2005-10-02 04:05:40 +00:00
|
|
|
<?php
|
2010-10-23 14:16:26 +00:00
|
|
|
/**
|
|
|
|
|
* Base classes for dumps and export
|
|
|
|
|
*
|
|
|
|
|
* Copyright © 2003, 2005, 2006 Brion Vibber <brion@pobox.com>
|
|
|
|
|
* http://www.mediawiki.org/
|
|
|
|
|
*
|
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
|
|
|
*
|
|
|
|
|
* @file
|
|
|
|
|
*/
|
2006-10-03 13:00:52 +00:00
|
|
|
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
/**
|
|
|
|
|
* @defgroup Dump Dump
|
|
|
|
|
*/
|
Some small doc tweaks to reduce Doxygen warnings, namely:
* @link. You might think @link would surely mean "here comes a web URL" ... but @link is a valid command
in Doxygen, which means an entirely different kind of link (an internal link to somewhere, so that you can separate
documentation and implementation). The result is a mess, and the best solution I can see is to use "@see" instead of "@link".
* Warning: argument `nourl' of command @param is not found in the argument list of Linker::makeMediaLinkObj($title,$text='')
* Moving few class descriptions to right above classes, and/or formatting into Javadoc style.
* "@addtogroup Special Pages" --> "@addtogroup SpecialPage" so that all special pages have the same @addtogroup tag.
* @fixme --> @todo (must have missed these before)
* "@param $specialPage @see" remove the "@" in the "@see" to stop warning.
* @throws wants type, then a brief description, to stop warning.
This last one is for PHPdocumentor only, but it fixes something for PHPDocumentor, and should be neutral for Doxygen:
* WARNING in includes/api/ApiFormatYaml_spyc.php on line 860: docblock template never terminated with /**#@-*/
2007-04-18 09:50:10 +00:00
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
/**
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup SpecialPage Dump
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
|
|
|
|
class WikiExporter {
|
2006-03-23 08:50:31 +00:00
|
|
|
var $list_authors = false ; # Return distinct author list (when not returning full history)
|
|
|
|
|
var $author_list = "" ;
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-03-04 02:03:52 +00:00
|
|
|
var $dumpUploads = false;
|
2011-04-12 19:25:56 +00:00
|
|
|
var $dumpUploadFileContents = false;
|
2007-01-17 22:32:40 +00:00
|
|
|
|
2009-03-02 19:48:41 +00:00
|
|
|
const FULL = 1;
|
|
|
|
|
const CURRENT = 2;
|
|
|
|
|
const STABLE = 4; // extension defined
|
|
|
|
|
const LOGS = 8;
|
2011-11-02 07:58:43 +00:00
|
|
|
const RANGE = 16;
|
2006-10-03 13:00:52 +00:00
|
|
|
|
|
|
|
|
const BUFFER = 0;
|
|
|
|
|
const STREAM = 1;
|
|
|
|
|
|
|
|
|
|
const TEXT = 0;
|
|
|
|
|
const STUB = 1;
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
var $buffer;
|
|
|
|
|
|
|
|
|
|
var $text;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @var DumpOutput
|
|
|
|
|
*/
|
|
|
|
|
var $sink;
|
|
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
/**
|
2006-10-03 13:00:52 +00:00
|
|
|
* If using WikiExporter::STREAM to stream a large amount of data,
|
2005-10-02 04:05:40 +00:00
|
|
|
* provide a database connection which is not managed by
|
|
|
|
|
* LoadBalancer to read from: some history blob types will
|
|
|
|
|
* make additional queries to pull source data while the
|
|
|
|
|
* main query is still running.
|
|
|
|
|
*
|
2011-12-11 18:51:44 +00:00
|
|
|
* @param $db DatabaseBase
|
2011-11-02 07:58:43 +00:00
|
|
|
* @param $history Mixed: one of WikiExporter::FULL, WikiExporter::CURRENT,
|
|
|
|
|
* WikiExporter::RANGE or WikiExporter::STABLE,
|
2008-05-22 19:48:26 +00:00
|
|
|
* or an associative array:
|
|
|
|
|
* offset: non-inclusive offset at which to start the query
|
|
|
|
|
* limit: maximum number of rows to return
|
|
|
|
|
* dir: "asc" or "desc" timestamp order
|
|
|
|
|
* @param $buffer Int: one of WikiExporter::BUFFER or WikiExporter::STREAM
|
2009-10-04 21:21:30 +00:00
|
|
|
* @param $text Int: one of WikiExporter::TEXT or WikiExporter::STUB
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
2007-01-20 13:34:31 +00:00
|
|
|
function __construct( &$db, $history = WikiExporter::CURRENT,
|
2006-10-03 13:00:52 +00:00
|
|
|
$buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->db =& $db;
|
|
|
|
|
$this->history = $history;
|
|
|
|
|
$this->buffer = $buffer;
|
|
|
|
|
$this->writer = new XmlDumpWriter();
|
|
|
|
|
$this->sink = new DumpOutput();
|
2005-10-16 17:33:41 +00:00
|
|
|
$this->text = $text;
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
/**
|
|
|
|
|
* Set the DumpOutput or DumpFilter object which will receive
|
|
|
|
|
* various row objects and XML output for filtering. Filters
|
|
|
|
|
* can be chained or used as callbacks.
|
|
|
|
|
*
|
2008-05-22 19:48:26 +00:00
|
|
|
* @param $sink mixed
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
2008-09-18 16:41:43 +00:00
|
|
|
public function setOutputSink( &$sink ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->sink =& $sink;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2008-09-18 16:41:43 +00:00
|
|
|
public function openStream() {
|
2005-10-02 04:05:40 +00:00
|
|
|
$output = $this->writer->openStream();
|
2011-04-09 14:43:12 +00:00
|
|
|
$this->sink->writeOpenStream( $output );
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
|
|
|
|
|
2008-09-18 16:41:43 +00:00
|
|
|
public function closeStream() {
|
2005-10-02 04:05:40 +00:00
|
|
|
$output = $this->writer->closeStream();
|
2011-04-09 14:43:12 +00:00
|
|
|
$this->sink->writeCloseStream( $output );
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Dumps a series of page and revision records for all pages
|
|
|
|
|
* in the database, either including complete history or only
|
|
|
|
|
* the most recent version.
|
|
|
|
|
*/
|
2008-09-18 16:41:43 +00:00
|
|
|
public function allPages() {
|
2012-04-07 16:33:50 +00:00
|
|
|
$this->dumpFrom( '' );
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
/**
|
|
|
|
|
* Dumps a series of page and revision records for those pages
|
|
|
|
|
* in the database falling within the page_id range given.
|
2008-05-22 19:48:26 +00:00
|
|
|
* @param $start Int: inclusive lower limit (this id is included)
|
|
|
|
|
* @param $end Int: Exclusive upper limit (this id is not included)
|
2005-10-02 04:05:40 +00:00
|
|
|
* If 0, no upper limit.
|
|
|
|
|
*/
|
2008-09-18 16:41:43 +00:00
|
|
|
public function pagesByRange( $start, $end ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$condition = 'page_id >= ' . intval( $start );
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $end ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$condition .= ' AND page_id < ' . intval( $end );
|
|
|
|
|
}
|
2012-04-07 16:33:50 +00:00
|
|
|
$this->dumpFrom( $condition );
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-11-02 07:58:43 +00:00
|
|
|
/**
|
|
|
|
|
* Dumps a series of page and revision records for those pages
|
|
|
|
|
* in the database with revisions falling within the rev_id range given.
|
|
|
|
|
* @param $start Int: inclusive lower limit (this id is included)
|
|
|
|
|
* @param $end Int: Exclusive upper limit (this id is not included)
|
|
|
|
|
* If 0, no upper limit.
|
|
|
|
|
*/
|
|
|
|
|
public function revsByRange( $start, $end ) {
|
|
|
|
|
$condition = 'rev_id >= ' . intval( $start );
|
|
|
|
|
if ( $end ) {
|
|
|
|
|
$condition .= ' AND rev_id < ' . intval( $end );
|
|
|
|
|
}
|
2012-04-07 16:33:50 +00:00
|
|
|
$this->dumpFrom( $condition );
|
2011-11-02 07:58:43 +00:00
|
|
|
}
|
|
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
/**
|
2008-05-22 19:48:26 +00:00
|
|
|
* @param $title Title
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
2008-09-18 16:41:43 +00:00
|
|
|
public function pageByTitle( $title ) {
|
2012-04-07 16:33:50 +00:00
|
|
|
$this->dumpFrom(
|
2005-10-02 04:05:40 +00:00
|
|
|
'page_namespace=' . $title->getNamespace() .
|
2008-01-14 09:26:36 +00:00
|
|
|
' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) );
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $name string
|
|
|
|
|
* @throws MWException
|
|
|
|
|
*/
|
2008-09-18 16:41:43 +00:00
|
|
|
public function pageByName( $name ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$title = Title::newFromText( $name );
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( is_null( $title ) ) {
|
2010-12-05 13:57:07 +00:00
|
|
|
throw new MWException( "Can't export invalid title" );
|
2005-10-02 04:05:40 +00:00
|
|
|
} else {
|
2012-04-07 16:33:50 +00:00
|
|
|
$this->pageByTitle( $title );
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $names array
|
|
|
|
|
*/
|
2008-09-18 16:41:43 +00:00
|
|
|
public function pagesByName( $names ) {
|
2011-04-07 21:04:16 +00:00
|
|
|
foreach ( $names as $name ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->pageByName( $name );
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2008-09-18 16:41:43 +00:00
|
|
|
public function allLogs() {
|
2012-04-07 16:33:50 +00:00
|
|
|
$this->dumpFrom( '' );
|
2008-09-18 16:41:43 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $start int
|
|
|
|
|
* @param $end int
|
|
|
|
|
*/
|
2008-09-18 16:41:43 +00:00
|
|
|
public function logsByRange( $start, $end ) {
|
|
|
|
|
$condition = 'log_id >= ' . intval( $start );
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $end ) {
|
2008-09-18 16:41:43 +00:00
|
|
|
$condition .= ' AND log_id < ' . intval( $end );
|
|
|
|
|
}
|
2012-04-07 16:33:50 +00:00
|
|
|
$this->dumpFrom( $condition );
|
2008-09-18 16:41:43 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* Generates the distinct list of authors of an article
|
|
|
|
|
* Not called by default (depends on $this->list_authors)
|
|
|
|
|
* Can be set by Special:Export when not exporting whole history
|
|
|
|
|
*
|
|
|
|
|
* @param $cond
|
|
|
|
|
*/
|
2011-04-12 12:09:11 +00:00
|
|
|
protected function do_list_authors( $cond ) {
|
2010-04-04 18:33:57 +00:00
|
|
|
wfProfileIn( __METHOD__ );
|
2006-03-23 08:50:31 +00:00
|
|
|
$this->author_list = "<contributors>";
|
2011-04-07 21:04:16 +00:00
|
|
|
// rev_deleted
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2011-04-12 12:09:11 +00:00
|
|
|
$res = $this->db->select(
|
|
|
|
|
array( 'page', 'revision' ),
|
|
|
|
|
array( 'DISTINCT rev_user_text', 'rev_user' ),
|
|
|
|
|
array(
|
|
|
|
|
$this->db->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0',
|
|
|
|
|
$cond,
|
|
|
|
|
'page_id = rev_id',
|
|
|
|
|
),
|
|
|
|
|
__METHOD__
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
foreach ( $res as $row ) {
|
2008-04-14 07:45:50 +00:00
|
|
|
$this->author_list .= "<contributor>" .
|
|
|
|
|
"<username>" .
|
|
|
|
|
htmlentities( $row->rev_user_text ) .
|
|
|
|
|
"</username>" .
|
|
|
|
|
"<id>" .
|
2006-03-23 08:50:31 +00:00
|
|
|
$row->rev_user .
|
2008-04-14 07:45:50 +00:00
|
|
|
"</id>" .
|
2006-03-23 08:50:31 +00:00
|
|
|
"</contributor>";
|
|
|
|
|
}
|
|
|
|
|
$this->author_list .= "</contributors>";
|
2011-02-10 16:39:53 +00:00
|
|
|
wfProfileOut( __METHOD__ );
|
2006-03-23 08:50:31 +00:00
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $cond string
|
|
|
|
|
* @throws MWException
|
|
|
|
|
* @throws Exception
|
|
|
|
|
*/
|
2008-09-18 16:41:43 +00:00
|
|
|
protected function dumpFrom( $cond = '' ) {
|
2009-03-02 19:48:41 +00:00
|
|
|
wfProfileIn( __METHOD__ );
|
|
|
|
|
# For logging dumps...
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $this->history & self::LOGS ) {
|
2008-09-18 00:02:57 +00:00
|
|
|
$where = array( 'user_id = log_user' );
|
|
|
|
|
# Hide private logs
|
2009-03-01 20:04:52 +00:00
|
|
|
$hideLogs = LogEventsList::getExcludeClause( $this->db );
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $hideLogs ) $where[] = $hideLogs;
|
2009-03-01 20:04:52 +00:00
|
|
|
# Add on any caller specified conditions
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $cond ) $where[] = $cond;
|
2009-02-28 19:10:37 +00:00
|
|
|
# Get logging table name for logging.* clause
|
2011-04-07 21:04:16 +00:00
|
|
|
$logging = $this->db->tableName( 'logging' );
|
2012-04-03 17:55:02 +00:00
|
|
|
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $this->buffer == WikiExporter::STREAM ) {
|
2012-04-03 17:55:02 +00:00
|
|
|
$prev = $this->db->bufferResults( false );
|
|
|
|
|
}
|
|
|
|
|
$wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early
|
|
|
|
|
try {
|
|
|
|
|
$result = $this->db->select( array( 'logging', 'user' ),
|
|
|
|
|
array( "{$logging}.*", 'user_name' ), // grab the user name
|
|
|
|
|
$where,
|
|
|
|
|
__METHOD__,
|
|
|
|
|
array( 'ORDER BY' => 'log_id', 'USE INDEX' => array( 'logging' => 'PRIMARY' ) )
|
|
|
|
|
);
|
|
|
|
|
$wrapper = $this->db->resultObject( $result );
|
|
|
|
|
$this->outputLogStream( $wrapper );
|
|
|
|
|
if ( $this->buffer == WikiExporter::STREAM ) {
|
|
|
|
|
$this->db->bufferResults( $prev );
|
|
|
|
|
}
|
|
|
|
|
} catch ( Exception $e ) {
|
|
|
|
|
// Throwing the exception does not reliably free the resultset, and
|
|
|
|
|
// would also leave the connection in unbuffered mode.
|
|
|
|
|
|
|
|
|
|
// Freeing result
|
|
|
|
|
try {
|
|
|
|
|
if ( $wrapper ) {
|
|
|
|
|
$wrapper->free();
|
|
|
|
|
}
|
|
|
|
|
} catch ( Exception $e2 ) {
|
|
|
|
|
// Already in panic mode -> ignoring $e2 as $e has
|
|
|
|
|
// higher priority
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Putting database back in previous buffer mode
|
|
|
|
|
try {
|
|
|
|
|
if ( $this->buffer == WikiExporter::STREAM ) {
|
|
|
|
|
$this->db->bufferResults( $prev );
|
|
|
|
|
}
|
|
|
|
|
} catch ( Exception $e2 ) {
|
|
|
|
|
// Already in panic mode -> ignoring $e2 as $e has
|
|
|
|
|
// higher priority
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Inform caller about problem
|
|
|
|
|
throw $e;
|
2009-03-01 20:04:52 +00:00
|
|
|
}
|
2008-09-18 00:02:57 +00:00
|
|
|
# For page dumps...
|
2008-09-18 16:41:43 +00:00
|
|
|
} else {
|
2009-03-02 19:48:41 +00:00
|
|
|
$tables = array( 'page', 'revision' );
|
|
|
|
|
$opts = array( 'ORDER BY' => 'page_id ASC' );
|
|
|
|
|
$opts['USE INDEX'] = array();
|
|
|
|
|
$join = array();
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( is_array( $this->history ) ) {
|
2009-09-29 21:37:07 +00:00
|
|
|
# Time offset/limit for all pages/history...
|
2009-03-02 19:48:41 +00:00
|
|
|
$revJoin = 'page_id=rev_page';
|
|
|
|
|
# Set time order
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $this->history['dir'] == 'asc' ) {
|
2008-09-18 16:41:43 +00:00
|
|
|
$op = '>';
|
2009-03-02 19:48:41 +00:00
|
|
|
$opts['ORDER BY'] = 'rev_timestamp ASC';
|
2008-09-18 16:41:43 +00:00
|
|
|
} else {
|
|
|
|
|
$op = '<';
|
2009-03-02 19:48:41 +00:00
|
|
|
$opts['ORDER BY'] = 'rev_timestamp DESC';
|
2008-09-18 16:41:43 +00:00
|
|
|
}
|
2009-03-02 19:48:41 +00:00
|
|
|
# Set offset
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( !empty( $this->history['offset'] ) ) {
|
2009-03-02 19:48:41 +00:00
|
|
|
$revJoin .= " AND rev_timestamp $op " .
|
|
|
|
|
$this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) );
|
2008-09-18 16:41:43 +00:00
|
|
|
}
|
2011-04-07 21:04:16 +00:00
|
|
|
$join['revision'] = array( 'INNER JOIN', $revJoin );
|
2009-03-02 19:48:41 +00:00
|
|
|
# Set query limit
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( !empty( $this->history['limit'] ) ) {
|
2009-03-02 19:48:41 +00:00
|
|
|
$opts['LIMIT'] = intval( $this->history['limit'] );
|
2008-09-18 16:41:43 +00:00
|
|
|
}
|
2011-04-07 21:04:16 +00:00
|
|
|
} elseif ( $this->history & WikiExporter::FULL ) {
|
2009-09-29 21:37:07 +00:00
|
|
|
# Full history dumps...
|
2011-04-07 21:04:16 +00:00
|
|
|
$join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' );
|
|
|
|
|
} elseif ( $this->history & WikiExporter::CURRENT ) {
|
2009-09-29 21:37:07 +00:00
|
|
|
# Latest revision dumps...
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $this->list_authors && $cond != '' ) { // List authors, if so desired
|
2011-04-12 12:09:11 +00:00
|
|
|
$this->do_list_authors( $cond );
|
2009-09-29 21:37:07 +00:00
|
|
|
}
|
2011-04-07 21:04:16 +00:00
|
|
|
$join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' );
|
|
|
|
|
} elseif ( $this->history & WikiExporter::STABLE ) {
|
2009-09-29 21:37:07 +00:00
|
|
|
# "Stable" revision dumps...
|
|
|
|
|
# Default JOIN, to be overridden...
|
2011-04-07 21:04:16 +00:00
|
|
|
$join['revision'] = array( 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' );
|
2009-09-29 21:37:07 +00:00
|
|
|
# One, and only one hook should set this, and return false
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( wfRunHooks( 'WikiExporter::dumpStableQuery', array( &$tables, &$opts, &$join ) ) ) {
|
2009-09-29 21:37:07 +00:00
|
|
|
wfProfileOut( __METHOD__ );
|
2011-04-07 21:04:16 +00:00
|
|
|
throw new MWException( __METHOD__ . " given invalid history dump type." );
|
2009-09-29 21:37:07 +00:00
|
|
|
}
|
2011-11-02 07:58:43 +00:00
|
|
|
} elseif ( $this->history & WikiExporter::RANGE ) {
|
|
|
|
|
# Dump of revisions within a specified range
|
|
|
|
|
$join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' );
|
2012-05-12 22:06:48 +00:00
|
|
|
$opts['ORDER BY'] = array( 'rev_page ASC', 'rev_id ASC' );
|
2006-08-05 14:10:10 +00:00
|
|
|
} else {
|
2009-09-29 21:37:07 +00:00
|
|
|
# Uknown history specification parameter?
|
2009-03-02 19:48:41 +00:00
|
|
|
wfProfileOut( __METHOD__ );
|
2011-04-07 21:04:16 +00:00
|
|
|
throw new MWException( __METHOD__ . " given invalid history dump type." );
|
2009-03-02 19:48:41 +00:00
|
|
|
}
|
|
|
|
|
# Query optimization hacks
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $cond == '' ) {
|
2009-03-02 19:48:41 +00:00
|
|
|
$opts[] = 'STRAIGHT_JOIN';
|
|
|
|
|
$opts['USE INDEX']['page'] = 'PRIMARY';
|
|
|
|
|
}
|
|
|
|
|
# Build text join options
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $this->text != WikiExporter::STUB ) { // 1-pass
|
2009-03-02 19:48:41 +00:00
|
|
|
$tables[] = 'text';
|
2011-04-07 21:04:16 +00:00
|
|
|
$join['text'] = array( 'INNER JOIN', 'rev_text_id=old_id' );
|
2006-08-05 14:10:10 +00:00
|
|
|
}
|
2008-09-18 16:41:43 +00:00
|
|
|
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $this->buffer == WikiExporter::STREAM ) {
|
2008-09-18 16:41:43 +00:00
|
|
|
$prev = $this->db->bufferResults( false );
|
2006-08-05 14:10:10 +00:00
|
|
|
}
|
2011-04-07 21:04:16 +00:00
|
|
|
|
2012-04-03 17:55:02 +00:00
|
|
|
$wrapper = null; // Assuring $wrapper is not undefined, if exception occurs early
|
|
|
|
|
try {
|
|
|
|
|
wfRunHooks( 'ModifyExportQuery',
|
2009-07-27 17:57:51 +00:00
|
|
|
array( $this->db, &$tables, &$cond, &$opts, &$join ) );
|
2009-03-02 19:48:41 +00:00
|
|
|
|
2012-04-03 17:55:02 +00:00
|
|
|
# Do the query!
|
|
|
|
|
$result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join );
|
|
|
|
|
$wrapper = $this->db->resultObject( $result );
|
|
|
|
|
# Output dump results
|
|
|
|
|
$this->outputPageStream( $wrapper );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-03 17:55:02 +00:00
|
|
|
if ( $this->buffer == WikiExporter::STREAM ) {
|
|
|
|
|
$this->db->bufferResults( $prev );
|
|
|
|
|
}
|
|
|
|
|
} catch ( Exception $e ) {
|
|
|
|
|
// Throwing the exception does not reliably free the resultset, and
|
|
|
|
|
// would also leave the connection in unbuffered mode.
|
|
|
|
|
|
|
|
|
|
// Freeing result
|
|
|
|
|
try {
|
|
|
|
|
if ( $wrapper ) {
|
|
|
|
|
$wrapper->free();
|
|
|
|
|
}
|
|
|
|
|
} catch ( Exception $e2 ) {
|
|
|
|
|
// Already in panic mode -> ignoring $e2 as $e has
|
|
|
|
|
// higher priority
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Putting database back in previous buffer mode
|
|
|
|
|
try {
|
|
|
|
|
if ( $this->buffer == WikiExporter::STREAM ) {
|
|
|
|
|
$this->db->bufferResults( $prev );
|
|
|
|
|
}
|
|
|
|
|
} catch ( Exception $e2 ) {
|
|
|
|
|
// Already in panic mode -> ignoring $e2 as $e has
|
|
|
|
|
// higher priority
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Inform caller about problem
|
|
|
|
|
throw $e;
|
2008-09-18 16:41:43 +00:00
|
|
|
}
|
|
|
|
|
}
|
2009-03-02 19:48:41 +00:00
|
|
|
wfProfileOut( __METHOD__ );
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
/**
|
|
|
|
|
* Runs through a query result set dumping page and revision records.
|
|
|
|
|
* The result set should be sorted/grouped by page to avoid duplicate
|
|
|
|
|
* page records in the output.
|
|
|
|
|
*
|
2012-04-03 17:55:02 +00:00
|
|
|
* Should be safe for
|
2005-10-02 04:05:40 +00:00
|
|
|
* streaming (non-buffered) queries, as long as it was made on a
|
|
|
|
|
* separate database connection not managed by LoadBalancer; some
|
|
|
|
|
* blob storage types will make queries to pull source data.
|
|
|
|
|
*
|
2008-05-22 19:48:26 +00:00
|
|
|
* @param $resultset ResultWrapper
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
2008-09-18 16:41:43 +00:00
|
|
|
protected function outputPageStream( $resultset ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$last = null;
|
2010-10-13 23:11:40 +00:00
|
|
|
foreach ( $resultset as $row ) {
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( is_null( $last ) ||
|
2005-10-02 04:05:40 +00:00
|
|
|
$last->page_namespace != $row->page_namespace ||
|
|
|
|
|
$last->page_title != $row->page_title ) {
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( isset( $last ) ) {
|
2008-03-04 02:03:52 +00:00
|
|
|
$output = '';
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $this->dumpUploads ) {
|
2011-04-12 19:25:56 +00:00
|
|
|
$output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents );
|
2008-03-04 02:03:52 +00:00
|
|
|
}
|
|
|
|
|
$output .= $this->writer->closePage();
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->sink->writeClosePage( $output );
|
|
|
|
|
}
|
|
|
|
|
$output = $this->writer->openPage( $row );
|
|
|
|
|
$this->sink->writeOpenPage( $row, $output );
|
|
|
|
|
$last = $row;
|
|
|
|
|
}
|
|
|
|
|
$output = $this->writer->writeRevision( $row );
|
|
|
|
|
$this->sink->writeRevision( $row, $output );
|
|
|
|
|
}
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( isset( $last ) ) {
|
2008-03-04 02:03:52 +00:00
|
|
|
$output = '';
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $this->dumpUploads ) {
|
2011-04-12 19:25:56 +00:00
|
|
|
$output .= $this->writer->writeUploads( $last, $this->dumpUploadFileContents );
|
2008-03-04 02:03:52 +00:00
|
|
|
}
|
|
|
|
|
$output .= $this->author_list;
|
|
|
|
|
$output .= $this->writer->closePage();
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->sink->writeClosePage( $output );
|
|
|
|
|
}
|
|
|
|
|
}
|
2011-04-07 21:04:16 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $resultset array
|
|
|
|
|
*/
|
2008-09-18 16:41:43 +00:00
|
|
|
protected function outputLogStream( $resultset ) {
|
2010-10-13 23:11:40 +00:00
|
|
|
foreach ( $resultset as $row ) {
|
2008-09-18 00:02:57 +00:00
|
|
|
$output = $this->writer->writeLogItem( $row );
|
|
|
|
|
$this->sink->writeLogItem( $row, $output );
|
|
|
|
|
}
|
|
|
|
|
}
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
|
|
|
|
|
2007-04-20 08:55:14 +00:00
|
|
|
/**
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup Dump
|
2007-04-20 08:55:14 +00:00
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
class XmlDumpWriter {
|
|
|
|
|
/**
|
|
|
|
|
* Returns the export schema version.
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
|
|
|
|
function schemaVersion() {
|
2012-06-05 22:58:54 +00:00
|
|
|
return "0.7";
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
/**
|
|
|
|
|
* Opens the XML output stream's root <mediawiki> element.
|
|
|
|
|
* This does not include an xml directive, so is safe to include
|
|
|
|
|
* as a subelement in a larger XML stream. Namespace and XML Schema
|
|
|
|
|
* references are included.
|
|
|
|
|
*
|
|
|
|
|
* Output will be encoded in UTF-8.
|
|
|
|
|
*
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
|
|
|
|
function openStream() {
|
2010-09-07 22:37:55 +00:00
|
|
|
global $wgLanguageCode;
|
2005-10-02 04:05:40 +00:00
|
|
|
$ver = $this->schemaVersion();
|
2008-12-14 19:14:21 +00:00
|
|
|
return Xml::element( 'mediawiki', array(
|
2005-10-02 04:05:40 +00:00
|
|
|
'xmlns' => "http://www.mediawiki.org/xml/export-$ver/",
|
|
|
|
|
'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance",
|
|
|
|
|
'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " .
|
|
|
|
|
"http://www.mediawiki.org/xml/export-$ver.xsd",
|
|
|
|
|
'version' => $ver,
|
2010-09-07 22:37:55 +00:00
|
|
|
'xml:lang' => $wgLanguageCode ),
|
2005-10-02 04:05:40 +00:00
|
|
|
null ) .
|
|
|
|
|
"\n" .
|
|
|
|
|
$this->siteInfo();
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function siteInfo() {
|
|
|
|
|
$info = array(
|
|
|
|
|
$this->sitename(),
|
|
|
|
|
$this->homelink(),
|
|
|
|
|
$this->generator(),
|
|
|
|
|
$this->caseSetting(),
|
|
|
|
|
$this->namespaces() );
|
|
|
|
|
return " <siteinfo>\n " .
|
|
|
|
|
implode( "\n ", $info ) .
|
|
|
|
|
"\n </siteinfo>\n";
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function sitename() {
|
|
|
|
|
global $wgSitename;
|
2008-12-14 19:14:21 +00:00
|
|
|
return Xml::element( 'sitename', array(), $wgSitename );
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function generator() {
|
2011-05-15 13:21:16 +00:00
|
|
|
global $wgVersion;
|
|
|
|
|
return Xml::element( 'generator', array(), "MediaWiki $wgVersion" );
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function homelink() {
|
2011-08-19 15:27:49 +00:00
|
|
|
return Xml::element( 'base', array(), Title::newMainPage()->getCanonicalUrl() );
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function caseSetting() {
|
|
|
|
|
global $wgCapitalLinks;
|
|
|
|
|
// "case-insensitive" option is reserved for future
|
|
|
|
|
$sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
|
2008-12-14 19:14:21 +00:00
|
|
|
return Xml::element( 'case', array(), $sensitivity );
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function namespaces() {
|
|
|
|
|
global $wgContLang;
|
2009-01-18 05:54:27 +00:00
|
|
|
$spaces = "<namespaces>\n";
|
2011-04-07 21:04:16 +00:00
|
|
|
foreach ( $wgContLang->getFormattedNamespaces() as $ns => $title ) {
|
|
|
|
|
$spaces .= ' ' .
|
|
|
|
|
Xml::element( 'namespace',
|
2009-10-09 12:52:16 +00:00
|
|
|
array( 'key' => $ns,
|
|
|
|
|
'case' => MWNamespace::isCapitalized( $ns ) ? 'first-letter' : 'case-sensitive',
|
|
|
|
|
), $title ) . "\n";
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
|
|
|
|
$spaces .= " </namespaces>";
|
|
|
|
|
return $spaces;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
/**
|
|
|
|
|
* Closes the output stream with the closing root element.
|
|
|
|
|
* Call when finished dumping things.
|
2011-06-25 16:15:44 +00:00
|
|
|
*
|
|
|
|
|
* @return string
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
|
|
|
|
function closeStream() {
|
|
|
|
|
return "</mediawiki>\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Opens a <page> section on the output stream, with data
|
|
|
|
|
* from the given database row.
|
|
|
|
|
*
|
2008-05-22 19:48:26 +00:00
|
|
|
* @param $row object
|
2005-10-02 04:05:40 +00:00
|
|
|
* @return string
|
|
|
|
|
* @access private
|
|
|
|
|
*/
|
|
|
|
|
function openPage( $row ) {
|
|
|
|
|
$out = " <page>\n";
|
|
|
|
|
$title = Title::makeTitle( $row->page_namespace, $row->page_title );
|
2012-01-12 22:17:54 +00:00
|
|
|
$out .= ' ' . Xml::elementClean( 'title', array(), self::canonicalTitle( $title ) ) . "\n";
|
2011-11-09 21:15:48 +00:00
|
|
|
$out .= ' ' . Xml::element( 'ns', array(), strval( $row->page_namespace) ) . "\n";
|
2008-12-14 19:14:21 +00:00
|
|
|
$out .= ' ' . Xml::element( 'id', array(), strval( $row->page_id ) ) . "\n";
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $row->page_is_redirect ) {
|
2011-11-09 21:15:48 +00:00
|
|
|
$page = WikiPage::factory( $title );
|
|
|
|
|
$redirect = $page->getRedirectTarget();
|
2011-11-13 19:58:24 +00:00
|
|
|
if ( $redirect instanceOf Title && $redirect->isValidRedirectTarget() ) {
|
2011-11-22 19:33:53 +00:00
|
|
|
$out .= ' ' . Xml::element( 'redirect', array( 'title' => self::canonicalTitle( $redirect ) ) ) . "\n";
|
2011-11-09 21:15:48 +00:00
|
|
|
}
|
2009-07-14 21:38:43 +00:00
|
|
|
}
|
2012-02-16 00:54:34 +00:00
|
|
|
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $row->page_restrictions != '' ) {
|
2008-12-14 19:14:21 +00:00
|
|
|
$out .= ' ' . Xml::element( 'restrictions', array(),
|
2008-10-13 00:08:44 +00:00
|
|
|
strval( $row->page_restrictions ) ) . "\n";
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
2011-04-07 21:04:16 +00:00
|
|
|
|
2009-07-27 17:57:51 +00:00
|
|
|
wfRunHooks( 'XmlDumpWriterOpenPage', array( $this, &$out, $row, $title ) );
|
2011-04-07 21:04:16 +00:00
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
return $out;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
/**
|
|
|
|
|
* Closes a <page> section on the output stream.
|
|
|
|
|
*
|
|
|
|
|
* @access private
|
2012-02-09 21:33:27 +00:00
|
|
|
* @return string
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
|
|
|
|
function closePage() {
|
|
|
|
|
return " </page>\n";
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
/**
|
|
|
|
|
* Dumps a <revision> section on the output stream, with
|
|
|
|
|
* data filled in from the given database row.
|
|
|
|
|
*
|
2008-05-22 19:48:26 +00:00
|
|
|
* @param $row object
|
2005-10-02 04:05:40 +00:00
|
|
|
* @return string
|
|
|
|
|
* @access private
|
|
|
|
|
*/
|
|
|
|
|
function writeRevision( $row ) {
|
2010-04-04 18:33:57 +00:00
|
|
|
wfProfileIn( __METHOD__ );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
$out = " <revision>\n";
|
2008-12-14 19:14:21 +00:00
|
|
|
$out .= " " . Xml::element( 'id', null, strval( $row->rev_id ) ) . "\n";
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2008-03-04 02:03:52 +00:00
|
|
|
$out .= $this->writeTimestamp( $row->rev_timestamp );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $row->rev_deleted & Revision::DELETED_USER ) {
|
2008-12-14 19:14:21 +00:00
|
|
|
$out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
|
2005-10-02 04:05:40 +00:00
|
|
|
} else {
|
2008-03-04 02:03:52 +00:00
|
|
|
$out .= $this->writeContributor( $row->rev_user, $row->rev_user_text );
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $row->rev_minor_edit ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$out .= " <minor/>\n";
|
|
|
|
|
}
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $row->rev_deleted & Revision::DELETED_COMMENT ) {
|
2008-12-14 19:14:21 +00:00
|
|
|
$out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
|
2011-04-07 21:04:16 +00:00
|
|
|
} elseif ( $row->rev_comment != '' ) {
|
2012-04-28 22:47:23 +00:00
|
|
|
$out .= " " . Xml::elementClean( 'comment', array(), strval( $row->rev_comment ) ) . "\n";
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-06-05 22:58:54 +00:00
|
|
|
if ( $row->rev_sha1 && !( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
|
|
|
|
|
$out .= " " . Xml::element('sha1', null, strval( $row->rev_sha1 ) ) . "\n";
|
|
|
|
|
} else {
|
|
|
|
|
$out .= " <sha1/>\n";
|
|
|
|
|
}
|
|
|
|
|
|
2009-10-15 07:08:56 +00:00
|
|
|
$text = '';
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $row->rev_deleted & Revision::DELETED_TEXT ) {
|
2008-12-14 19:14:21 +00:00
|
|
|
$out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
|
2011-04-07 21:04:16 +00:00
|
|
|
} elseif ( isset( $row->old_text ) ) {
|
2005-10-16 17:33:41 +00:00
|
|
|
// Raw text from the database may have invalid chars
|
|
|
|
|
$text = strval( Revision::getRevisionText( $row ) );
|
2008-12-14 19:14:21 +00:00
|
|
|
$out .= " " . Xml::elementClean( 'text',
|
2011-12-11 19:01:11 +00:00
|
|
|
array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ),
|
2005-10-16 17:33:41 +00:00
|
|
|
strval( $text ) ) . "\n";
|
|
|
|
|
} else {
|
|
|
|
|
// Stub output
|
2008-12-14 19:14:21 +00:00
|
|
|
$out .= " " . Xml::element( 'text',
|
2011-12-11 19:01:11 +00:00
|
|
|
array( 'id' => $row->rev_text_id, 'bytes' => intval( $row->rev_len ) ),
|
2005-10-16 17:33:41 +00:00
|
|
|
"" ) . "\n";
|
|
|
|
|
}
|
2009-10-15 07:08:56 +00:00
|
|
|
|
2009-07-27 17:57:51 +00:00
|
|
|
wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
$out .= " </revision>\n";
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2010-04-04 18:33:57 +00:00
|
|
|
wfProfileOut( __METHOD__ );
|
2005-10-02 04:05:40 +00:00
|
|
|
return $out;
|
|
|
|
|
}
|
2011-04-07 21:04:16 +00:00
|
|
|
|
2008-09-18 00:02:57 +00:00
|
|
|
/**
|
|
|
|
|
* Dumps a <logitem> section on the output stream, with
|
|
|
|
|
* data filled in from the given database row.
|
|
|
|
|
*
|
|
|
|
|
* @param $row object
|
|
|
|
|
* @return string
|
|
|
|
|
* @access private
|
|
|
|
|
*/
|
|
|
|
|
function writeLogItem( $row ) {
|
2010-04-04 18:33:57 +00:00
|
|
|
wfProfileIn( __METHOD__ );
|
2008-09-18 00:02:57 +00:00
|
|
|
|
2012-06-05 22:58:54 +00:00
|
|
|
$out = " <logitem>\n";
|
|
|
|
|
$out .= " " . Xml::element( 'id', null, strval( $row->log_id ) ) . "\n";
|
2008-09-18 00:02:57 +00:00
|
|
|
|
2012-06-05 22:58:54 +00:00
|
|
|
$out .= $this->writeTimestamp( $row->log_timestamp, " " );
|
2008-09-18 00:02:57 +00:00
|
|
|
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $row->log_deleted & LogPage::DELETED_USER ) {
|
2012-06-05 22:58:54 +00:00
|
|
|
$out .= " " . Xml::element( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
|
2008-09-18 00:02:57 +00:00
|
|
|
} else {
|
2012-06-05 22:58:54 +00:00
|
|
|
$out .= $this->writeContributor( $row->log_user, $row->user_name, " " );
|
2008-09-18 00:02:57 +00:00
|
|
|
}
|
|
|
|
|
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $row->log_deleted & LogPage::DELETED_COMMENT ) {
|
2012-06-05 22:58:54 +00:00
|
|
|
$out .= " " . Xml::element( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
|
2011-04-07 21:04:16 +00:00
|
|
|
} elseif ( $row->log_comment != '' ) {
|
2012-06-05 22:58:54 +00:00
|
|
|
$out .= " " . Xml::elementClean( 'comment', null, strval( $row->log_comment ) ) . "\n";
|
2008-09-18 00:02:57 +00:00
|
|
|
}
|
2011-04-07 21:04:16 +00:00
|
|
|
|
2012-06-05 22:58:54 +00:00
|
|
|
$out .= " " . Xml::element( 'type', null, strval( $row->log_type ) ) . "\n";
|
|
|
|
|
$out .= " " . Xml::element( 'action', null, strval( $row->log_action ) ) . "\n";
|
2008-09-18 00:02:57 +00:00
|
|
|
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $row->log_deleted & LogPage::DELETED_ACTION ) {
|
2012-06-05 22:58:54 +00:00
|
|
|
$out .= " " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
|
2008-09-18 00:02:57 +00:00
|
|
|
} else {
|
|
|
|
|
$title = Title::makeTitle( $row->log_namespace, $row->log_title );
|
2012-06-05 22:58:54 +00:00
|
|
|
$out .= " " . Xml::elementClean( 'logtitle', null, self::canonicalTitle( $title ) ) . "\n";
|
|
|
|
|
$out .= " " . Xml::elementClean( 'params',
|
2008-09-18 00:02:57 +00:00
|
|
|
array( 'xml:space' => 'preserve' ),
|
|
|
|
|
strval( $row->log_params ) ) . "\n";
|
|
|
|
|
}
|
|
|
|
|
|
2012-06-05 22:58:54 +00:00
|
|
|
$out .= " </logitem>\n";
|
2008-09-18 00:02:57 +00:00
|
|
|
|
2010-04-04 18:33:57 +00:00
|
|
|
wfProfileOut( __METHOD__ );
|
2008-09-18 00:02:57 +00:00
|
|
|
return $out;
|
|
|
|
|
}
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $timestamp string
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2012-06-05 22:58:54 +00:00
|
|
|
function writeTimestamp( $timestamp, $indent = " " ) {
|
2008-03-04 02:03:52 +00:00
|
|
|
$ts = wfTimestamp( TS_ISO_8601, $timestamp );
|
2012-06-05 22:58:54 +00:00
|
|
|
return $indent . Xml::element( 'timestamp', null, $ts ) . "\n";
|
2008-03-04 02:03:52 +00:00
|
|
|
}
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $id
|
|
|
|
|
* @param $text string
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2012-06-05 22:58:54 +00:00
|
|
|
function writeContributor( $id, $text, $indent = " " ) {
|
|
|
|
|
$out = $indent . "<contributor>\n";
|
2011-11-18 09:08:29 +00:00
|
|
|
if ( $id || !IP::isValid( $text ) ) {
|
2012-06-05 22:58:54 +00:00
|
|
|
$out .= $indent . " " . Xml::elementClean( 'username', null, strval( $text ) ) . "\n";
|
|
|
|
|
$out .= $indent . " " . Xml::element( 'id', null, strval( $id ) ) . "\n";
|
2008-03-04 02:03:52 +00:00
|
|
|
} else {
|
2012-06-05 22:58:54 +00:00
|
|
|
$out .= $indent . " " . Xml::elementClean( 'ip', null, strval( $text ) ) . "\n";
|
2008-03-04 02:03:52 +00:00
|
|
|
}
|
2012-06-05 22:58:54 +00:00
|
|
|
$out .= $indent . "</contributor>\n";
|
2008-03-04 02:03:52 +00:00
|
|
|
return $out;
|
|
|
|
|
}
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-03-04 02:03:52 +00:00
|
|
|
/**
|
|
|
|
|
* Warning! This data is potentially inconsistent. :(
|
2012-04-28 22:47:23 +00:00
|
|
|
* @param $row
|
|
|
|
|
* @param $dumpContents bool
|
2012-02-09 21:33:27 +00:00
|
|
|
* @return string
|
2008-03-04 02:03:52 +00:00
|
|
|
*/
|
2011-04-12 19:25:56 +00:00
|
|
|
function writeUploads( $row, $dumpContents = false ) {
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $row->page_namespace == NS_IMAGE ) {
|
2011-05-15 10:43:51 +00:00
|
|
|
$img = wfLocalFile( $row->page_title );
|
|
|
|
|
if ( $img && $img->exists() ) {
|
2008-03-04 02:03:52 +00:00
|
|
|
$out = '';
|
2011-04-07 21:04:16 +00:00
|
|
|
foreach ( array_reverse( $img->getHistory() ) as $ver ) {
|
2011-04-12 19:25:56 +00:00
|
|
|
$out .= $this->writeUpload( $ver, $dumpContents );
|
2008-03-04 02:03:52 +00:00
|
|
|
}
|
2011-04-12 19:25:56 +00:00
|
|
|
$out .= $this->writeUpload( $img, $dumpContents );
|
2008-03-04 02:03:52 +00:00
|
|
|
return $out;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return '';
|
|
|
|
|
}
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2011-06-25 16:15:44 +00:00
|
|
|
/**
|
|
|
|
|
* @param $file File
|
|
|
|
|
* @param $dumpContents bool
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2011-04-12 19:25:56 +00:00
|
|
|
function writeUpload( $file, $dumpContents = false ) {
|
|
|
|
|
if ( $file->isOld() ) {
|
2011-09-05 11:23:49 +00:00
|
|
|
$archiveName = " " .
|
2011-04-12 19:25:56 +00:00
|
|
|
Xml::element( 'archivename', null, $file->getArchiveName() ) . "\n";
|
|
|
|
|
} else {
|
|
|
|
|
$archiveName = '';
|
|
|
|
|
}
|
|
|
|
|
if ( $dumpContents ) {
|
|
|
|
|
# Dump file as base64
|
|
|
|
|
# Uses only XML-safe characters, so does not need escaping
|
2011-09-05 11:23:49 +00:00
|
|
|
$contents = ' <contents encoding="base64">' .
|
2011-04-12 19:25:56 +00:00
|
|
|
chunk_split( base64_encode( file_get_contents( $file->getPath() ) ) ) .
|
|
|
|
|
" </contents>\n";
|
|
|
|
|
} else {
|
|
|
|
|
$contents = '';
|
|
|
|
|
}
|
2012-06-05 22:58:54 +00:00
|
|
|
if ( $file->isDeleted( File::DELETED_COMMENT ) ) {
|
|
|
|
|
$comment = Xml::element( 'comment', array( 'deleted' => 'deleted' ) );
|
|
|
|
|
} else {
|
|
|
|
|
$comment = Xml::elementClean( 'comment', null, $file->getDescription() );
|
|
|
|
|
}
|
2008-03-04 02:03:52 +00:00
|
|
|
return " <upload>\n" .
|
|
|
|
|
$this->writeTimestamp( $file->getTimestamp() ) .
|
|
|
|
|
$this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) .
|
2012-06-05 22:58:54 +00:00
|
|
|
" " . $comment . "\n" .
|
2008-12-14 19:14:21 +00:00
|
|
|
" " . Xml::element( 'filename', null, $file->getName() ) . "\n" .
|
2011-09-05 11:23:49 +00:00
|
|
|
$archiveName .
|
2011-08-19 15:27:49 +00:00
|
|
|
" " . Xml::element( 'src', null, $file->getCanonicalUrl() ) . "\n" .
|
2008-12-14 19:14:21 +00:00
|
|
|
" " . Xml::element( 'size', null, $file->getSize() ) . "\n" .
|
2011-04-12 19:25:56 +00:00
|
|
|
" " . Xml::element( 'sha1base36', null, $file->getSha1() ) . "\n" .
|
|
|
|
|
" " . Xml::element( 'rel', null, $file->getRel() ) . "\n" .
|
|
|
|
|
$contents .
|
2008-03-04 02:03:52 +00:00
|
|
|
" </upload>\n";
|
|
|
|
|
}
|
2005-10-02 04:05:40 +00:00
|
|
|
|
2011-11-22 19:33:53 +00:00
|
|
|
/**
|
|
|
|
|
* Return prefixed text form of title, but using the content language's
|
|
|
|
|
* canonical namespace. This skips any special-casing such as gendered
|
|
|
|
|
* user namespaces -- which while useful, are not yet listed in the
|
|
|
|
|
* XML <siteinfo> data so are unsafe in export.
|
2011-12-11 18:51:44 +00:00
|
|
|
*
|
2011-11-22 19:33:53 +00:00
|
|
|
* @param Title $title
|
|
|
|
|
* @return string
|
2011-12-24 14:40:59 +00:00
|
|
|
* @since 1.18
|
2011-11-22 19:33:53 +00:00
|
|
|
*/
|
|
|
|
|
public static function canonicalTitle( Title $title ) {
|
|
|
|
|
if ( $title->getInterwiki() ) {
|
|
|
|
|
return $title->getPrefixedText();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
global $wgContLang;
|
2011-11-24 01:26:52 +00:00
|
|
|
$prefix = str_replace( '_', ' ', $wgContLang->getNsText( $title->getNamespace() ) );
|
2011-11-22 19:33:53 +00:00
|
|
|
|
2011-12-24 14:40:59 +00:00
|
|
|
if ( $prefix !== '' ) {
|
2011-11-22 19:33:53 +00:00
|
|
|
$prefix .= ':';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $prefix . $title->getText();
|
|
|
|
|
}
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Base class for output stream; prints to stdout or buffer or whereever.
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup Dump
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
|
|
|
|
class DumpOutput {
|
2012-04-28 22:47:23 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeOpenStream( $string ) {
|
|
|
|
|
$this->write( $string );
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeCloseStream( $string ) {
|
|
|
|
|
$this->write( $string );
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $page
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeOpenPage( $page, $string ) {
|
|
|
|
|
$this->write( $string );
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeClosePage( $string ) {
|
|
|
|
|
$this->write( $string );
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $rev
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeRevision( $rev, $string ) {
|
|
|
|
|
$this->write( $string );
|
|
|
|
|
}
|
2011-04-07 21:04:16 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $rev
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2008-09-18 00:02:57 +00:00
|
|
|
function writeLogItem( $rev, $string ) {
|
|
|
|
|
$this->write( $string );
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
/**
|
|
|
|
|
* Override to write to a different stream type.
|
2012-04-28 22:47:23 +00:00
|
|
|
* @param $string string
|
2005-10-02 04:05:40 +00:00
|
|
|
* @return bool
|
|
|
|
|
*/
|
|
|
|
|
function write( $string ) {
|
|
|
|
|
print $string;
|
|
|
|
|
}
|
2011-08-22 22:01:32 +00:00
|
|
|
|
2011-09-05 11:16:09 +00:00
|
|
|
/**
|
2011-09-05 11:23:49 +00:00
|
|
|
* Close the old file, move it to a specified name,
|
2011-09-05 11:16:09 +00:00
|
|
|
* and reopen new file with the old name. Use this
|
|
|
|
|
* for writing out a file in multiple pieces
|
|
|
|
|
* at specified checkpoints (e.g. every n hours).
|
|
|
|
|
* @param $newname mixed File name. May be a string or an array with one element
|
|
|
|
|
*/
|
2011-08-22 22:01:32 +00:00
|
|
|
function closeRenameAndReopen( $newname ) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2011-09-07 20:21:52 +00:00
|
|
|
/**
|
|
|
|
|
* Close the old file, and move it to a specified name.
|
2011-09-21 16:34:23 +00:00
|
|
|
* Use this for the last piece of a file written out
|
2011-09-07 20:21:52 +00:00
|
|
|
* at specified checkpoints (e.g. every n hours).
|
|
|
|
|
* @param $newname mixed File name. May be a string or an array with one element
|
|
|
|
|
* @param $open bool If true, a new file with the old filename will be opened again for writing (default: false)
|
|
|
|
|
*/
|
|
|
|
|
function closeAndRename( $newname, $open = false ) {
|
2011-08-22 22:01:32 +00:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2011-09-07 20:21:52 +00:00
|
|
|
/**
|
|
|
|
|
* Returns the name of the file or files which are
|
|
|
|
|
* being written to, if there are any.
|
2012-02-09 21:33:27 +00:00
|
|
|
* @return null
|
2011-09-07 20:21:52 +00:00
|
|
|
*/
|
2011-09-09 07:28:11 +00:00
|
|
|
function getFilenames() {
|
2011-08-22 22:01:32 +00:00
|
|
|
return NULL;
|
|
|
|
|
}
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Stream outputter to send data to a file.
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup Dump
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
|
|
|
|
class DumpFileOutput extends DumpOutput {
|
2012-04-01 23:46:42 +00:00
|
|
|
protected $handle = false, $filename;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $file
|
|
|
|
|
*/
|
2010-08-30 16:52:51 +00:00
|
|
|
function __construct( $file ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->handle = fopen( $file, "wt" );
|
2011-08-22 22:01:32 +00:00
|
|
|
$this->filename = $file;
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2012-03-26 09:26:48 +00:00
|
|
|
function writeCloseStream( $string ) {
|
|
|
|
|
parent::writeCloseStream( $string );
|
2012-04-01 23:46:42 +00:00
|
|
|
if ( $this->handle ) {
|
|
|
|
|
fclose( $this->handle );
|
|
|
|
|
$this->handle = false;
|
|
|
|
|
}
|
2012-03-26 09:26:48 +00:00
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function write( $string ) {
|
|
|
|
|
fputs( $this->handle, $string );
|
|
|
|
|
}
|
2011-08-22 22:01:32 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $newname
|
|
|
|
|
*/
|
2011-08-22 22:01:32 +00:00
|
|
|
function closeRenameAndReopen( $newname ) {
|
2011-09-07 20:21:52 +00:00
|
|
|
$this->closeAndRename( $newname, true );
|
2011-08-22 22:01:32 +00:00
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $newname
|
|
|
|
|
* @throws MWException
|
|
|
|
|
*/
|
2011-11-02 11:08:06 +00:00
|
|
|
function renameOrException( $newname ) {
|
|
|
|
|
if (! rename( $this->filename, $newname ) ) {
|
|
|
|
|
throw new MWException( __METHOD__ . ": rename of file {$this->filename} to $newname failed\n" );
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $newname array
|
|
|
|
|
* @return mixed
|
|
|
|
|
* @throws MWException
|
|
|
|
|
*/
|
2011-11-02 11:08:06 +00:00
|
|
|
function checkRenameArgCount( $newname ) {
|
2011-09-05 11:22:57 +00:00
|
|
|
if ( is_array( $newname ) ) {
|
|
|
|
|
if ( count( $newname ) > 1 ) {
|
|
|
|
|
throw new MWException( __METHOD__ . ": passed multiple arguments for rename of single file\n" );
|
|
|
|
|
} else {
|
2011-08-27 18:31:03 +00:00
|
|
|
$newname = $newname[0];
|
|
|
|
|
}
|
|
|
|
|
}
|
2011-11-02 11:08:06 +00:00
|
|
|
return $newname;
|
|
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $newname mixed
|
|
|
|
|
* @param $open bool
|
|
|
|
|
*/
|
2011-11-02 11:08:06 +00:00
|
|
|
function closeAndRename( $newname, $open = false ) {
|
|
|
|
|
$newname = $this->checkRenameArgCount( $newname );
|
2011-08-27 18:31:03 +00:00
|
|
|
if ( $newname ) {
|
2012-04-01 23:46:42 +00:00
|
|
|
if ( $this->handle ) {
|
|
|
|
|
fclose( $this->handle );
|
|
|
|
|
$this->handle = false;
|
|
|
|
|
}
|
2011-11-02 11:08:06 +00:00
|
|
|
$this->renameOrException( $newname );
|
|
|
|
|
if ( $open ) {
|
2011-09-07 20:21:52 +00:00
|
|
|
$this->handle = fopen( $this->filename, "wt" );
|
2011-08-22 22:01:32 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @return string|null
|
|
|
|
|
*/
|
2011-09-09 07:28:11 +00:00
|
|
|
function getFilenames() {
|
2011-08-22 22:01:32 +00:00
|
|
|
return $this->filename;
|
|
|
|
|
}
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Stream outputter to send data to a file via some filter program.
|
|
|
|
|
* Even if compression is available in a library, using a separate
|
|
|
|
|
* program can allow us to make use of a multi-processor system.
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup Dump
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
|
|
|
|
class DumpPipeOutput extends DumpFileOutput {
|
2011-09-05 11:16:09 +00:00
|
|
|
protected $command, $filename;
|
2012-04-07 16:25:35 +00:00
|
|
|
protected $procOpenResource = false;
|
2011-08-22 22:01:32 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $command
|
|
|
|
|
* @param $file null
|
|
|
|
|
*/
|
2010-08-30 16:52:51 +00:00
|
|
|
function __construct( $command, $file = null ) {
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( !is_null( $file ) ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$command .= " > " . wfEscapeShellArg( $file );
|
|
|
|
|
}
|
2011-09-05 11:23:49 +00:00
|
|
|
|
2011-09-05 11:22:57 +00:00
|
|
|
$this->startCommand( $command );
|
2011-08-22 22:01:32 +00:00
|
|
|
$this->command = $command;
|
|
|
|
|
$this->filename = $file;
|
|
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2012-03-26 09:26:48 +00:00
|
|
|
function writeCloseStream( $string ) {
|
|
|
|
|
parent::writeCloseStream( $string );
|
2012-04-01 23:46:42 +00:00
|
|
|
if ( $this->procOpenResource ) {
|
|
|
|
|
proc_close( $this->procOpenResource );
|
|
|
|
|
$this->procOpenResource = false;
|
|
|
|
|
}
|
2012-03-26 09:26:48 +00:00
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $command
|
|
|
|
|
*/
|
2011-09-05 11:23:49 +00:00
|
|
|
function startCommand( $command ) {
|
2011-08-27 18:31:03 +00:00
|
|
|
$spec = array(
|
|
|
|
|
0 => array( "pipe", "r" ),
|
|
|
|
|
);
|
|
|
|
|
$pipes = array();
|
|
|
|
|
$this->procOpenResource = proc_open( $command, $spec, $pipes );
|
|
|
|
|
$this->handle = $pipes[0];
|
|
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param mixed $newname
|
|
|
|
|
*/
|
2011-08-22 22:01:32 +00:00
|
|
|
function closeRenameAndReopen( $newname ) {
|
2011-09-07 20:21:52 +00:00
|
|
|
$this->closeAndRename( $newname, true );
|
2011-08-27 18:31:03 +00:00
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $newname mixed
|
|
|
|
|
* @param $open bool
|
|
|
|
|
*/
|
2011-09-07 20:21:52 +00:00
|
|
|
function closeAndRename( $newname, $open = false ) {
|
2011-11-02 11:08:06 +00:00
|
|
|
$newname = $this->checkRenameArgCount( $newname );
|
2011-08-27 18:31:03 +00:00
|
|
|
if ( $newname ) {
|
2012-04-01 23:46:42 +00:00
|
|
|
if ( $this->handle ) {
|
|
|
|
|
fclose( $this->handle );
|
|
|
|
|
$this->handle = false;
|
|
|
|
|
}
|
|
|
|
|
if ( $this->procOpenResource ) {
|
|
|
|
|
proc_close( $this->procOpenResource );
|
|
|
|
|
$this->procOpenResource = false;
|
|
|
|
|
}
|
2011-11-02 11:08:06 +00:00
|
|
|
$this->renameOrException( $newname );
|
|
|
|
|
if ( $open ) {
|
2011-09-07 20:21:52 +00:00
|
|
|
$command = $this->command;
|
|
|
|
|
$command .= " > " . wfEscapeShellArg( $this->filename );
|
|
|
|
|
$this->startCommand( $command );
|
2011-08-22 22:01:32 +00:00
|
|
|
}
|
|
|
|
|
}
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
2011-09-07 20:21:52 +00:00
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Sends dump output via the gzip compressor.
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup Dump
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
|
|
|
|
class DumpGZipOutput extends DumpPipeOutput {
|
2012-04-28 22:47:23 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param $file string
|
|
|
|
|
*/
|
2010-08-30 16:52:51 +00:00
|
|
|
function __construct( $file ) {
|
|
|
|
|
parent::__construct( "gzip", $file );
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Sends dump output via the bgzip2 compressor.
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup Dump
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
|
|
|
|
class DumpBZip2Output extends DumpPipeOutput {
|
2012-04-28 22:47:23 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param $file string
|
|
|
|
|
*/
|
2010-08-30 16:52:51 +00:00
|
|
|
function __construct( $file ) {
|
|
|
|
|
parent::__construct( "bzip2", $file );
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Sends dump output via the p7zip compressor.
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup Dump
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
|
|
|
|
class Dump7ZipOutput extends DumpPipeOutput {
|
2012-04-28 22:47:23 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param $file string
|
|
|
|
|
*/
|
2010-08-30 16:52:51 +00:00
|
|
|
function __construct( $file ) {
|
2011-11-02 09:55:43 +00:00
|
|
|
$command = $this->setup7zCommand( $file );
|
2010-08-30 16:52:51 +00:00
|
|
|
parent::__construct( $command );
|
2011-08-22 22:01:32 +00:00
|
|
|
$this->filename = $file;
|
|
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $file string
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2011-11-02 09:55:43 +00:00
|
|
|
function setup7zCommand( $file ) {
|
|
|
|
|
$command = "7za a -bd -si " . wfEscapeShellArg( $file );
|
|
|
|
|
// Suppress annoying useless crap from p7zip
|
|
|
|
|
// Unfortunately this could suppress real error messages too
|
|
|
|
|
$command .= ' >' . wfGetNull() . ' 2>&1';
|
|
|
|
|
return( $command );
|
|
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $newname string
|
|
|
|
|
* @param $open bool
|
|
|
|
|
*/
|
2011-09-07 20:21:52 +00:00
|
|
|
function closeAndRename( $newname, $open = false ) {
|
2011-11-02 11:08:06 +00:00
|
|
|
$newname = $this->checkRenameArgCount( $newname );
|
2011-08-27 18:31:03 +00:00
|
|
|
if ( $newname ) {
|
|
|
|
|
fclose( $this->handle );
|
2011-09-05 11:22:57 +00:00
|
|
|
proc_close( $this->procOpenResource );
|
2011-11-02 11:08:06 +00:00
|
|
|
$this->renameOrException( $newname );
|
|
|
|
|
if ( $open ) {
|
2011-12-11 18:51:44 +00:00
|
|
|
$command = $this->setup7zCommand( $this->filename );
|
2011-09-07 20:21:52 +00:00
|
|
|
$this->startCommand( $command );
|
2011-08-22 22:01:32 +00:00
|
|
|
}
|
|
|
|
|
}
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Dump output filter class.
|
|
|
|
|
* This just does output filtering and streaming; XML formatting is done
|
|
|
|
|
* higher up, so be careful in what you do.
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup Dump
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
|
|
|
|
class DumpFilter {
|
2012-04-28 22:47:23 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @var DumpOutput
|
2012-05-07 21:34:07 +00:00
|
|
|
* FIXME will need to be made protected whenever legacy code
|
|
|
|
|
* is updated.
|
2012-04-28 22:47:23 +00:00
|
|
|
*/
|
2012-05-07 21:34:07 +00:00
|
|
|
public $sink;
|
2012-04-28 22:47:23 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @var bool
|
|
|
|
|
*/
|
|
|
|
|
protected $sendingThisPage;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param $sink DumpOutput
|
|
|
|
|
*/
|
2010-08-30 16:52:51 +00:00
|
|
|
function __construct( &$sink ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->sink =& $sink;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeOpenStream( $string ) {
|
|
|
|
|
$this->sink->writeOpenStream( $string );
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeCloseStream( $string ) {
|
|
|
|
|
$this->sink->writeCloseStream( $string );
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $page
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeOpenPage( $page, $string ) {
|
|
|
|
|
$this->sendingThisPage = $this->pass( $page, $string );
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $this->sendingThisPage ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->sink->writeOpenPage( $page, $string );
|
|
|
|
|
}
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeClosePage( $string ) {
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $this->sendingThisPage ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->sink->writeClosePage( $string );
|
|
|
|
|
$this->sendingThisPage = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $rev
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeRevision( $rev, $string ) {
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $this->sendingThisPage ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->sink->writeRevision( $rev, $string );
|
|
|
|
|
}
|
|
|
|
|
}
|
2011-04-07 21:04:16 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $rev
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2008-09-18 00:02:57 +00:00
|
|
|
function writeLogItem( $rev, $string ) {
|
|
|
|
|
$this->sink->writeRevision( $rev, $string );
|
2011-04-07 21:04:16 +00:00
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $newname string
|
|
|
|
|
*/
|
2011-08-22 22:01:32 +00:00
|
|
|
function closeRenameAndReopen( $newname ) {
|
|
|
|
|
$this->sink->closeRenameAndReopen( $newname );
|
|
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $newname string
|
|
|
|
|
* @param $open bool
|
|
|
|
|
*/
|
2011-09-07 20:21:52 +00:00
|
|
|
function closeAndRename( $newname, $open = false ) {
|
|
|
|
|
$this->sink->closeAndRename( $newname, $open );
|
2011-08-22 22:01:32 +00:00
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @return array
|
|
|
|
|
*/
|
2011-09-09 07:28:11 +00:00
|
|
|
function getFilenames() {
|
|
|
|
|
return $this->sink->getFilenames();
|
2011-08-22 22:01:32 +00:00
|
|
|
}
|
|
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
/**
|
|
|
|
|
* Override for page-based filter types.
|
2012-04-28 22:47:23 +00:00
|
|
|
* @param $page
|
2005-10-02 04:05:40 +00:00
|
|
|
* @return bool
|
|
|
|
|
*/
|
2006-11-29 05:45:03 +00:00
|
|
|
function pass( $page ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Simple dump output filter to exclude all talk pages.
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup Dump
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
|
|
|
|
class DumpNotalkFilter extends DumpFilter {
|
2012-04-28 22:47:23 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param $page
|
|
|
|
|
* @return bool
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function pass( $page ) {
|
2008-03-21 23:13:34 +00:00
|
|
|
return !MWNamespace::isTalk( $page->page_namespace );
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Dump output filter to include or exclude pages in a given set of namespaces.
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup Dump
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
|
|
|
|
class DumpNamespaceFilter extends DumpFilter {
|
|
|
|
|
var $invert = false;
|
2005-10-02 07:17:41 +00:00
|
|
|
var $namespaces = array();
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $sink DumpOutput
|
|
|
|
|
* @param $param
|
|
|
|
|
*/
|
2010-08-30 16:52:51 +00:00
|
|
|
function __construct( &$sink, $param ) {
|
|
|
|
|
parent::__construct( $sink );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
$constants = array(
|
|
|
|
|
"NS_MAIN" => NS_MAIN,
|
|
|
|
|
"NS_TALK" => NS_TALK,
|
|
|
|
|
"NS_USER" => NS_USER,
|
|
|
|
|
"NS_USER_TALK" => NS_USER_TALK,
|
|
|
|
|
"NS_PROJECT" => NS_PROJECT,
|
|
|
|
|
"NS_PROJECT_TALK" => NS_PROJECT_TALK,
|
2008-11-27 22:36:25 +00:00
|
|
|
"NS_FILE" => NS_FILE,
|
|
|
|
|
"NS_FILE_TALK" => NS_FILE_TALK,
|
|
|
|
|
"NS_IMAGE" => NS_IMAGE, // NS_IMAGE is an alias for NS_FILE
|
2005-10-02 04:05:40 +00:00
|
|
|
"NS_IMAGE_TALK" => NS_IMAGE_TALK,
|
|
|
|
|
"NS_MEDIAWIKI" => NS_MEDIAWIKI,
|
|
|
|
|
"NS_MEDIAWIKI_TALK" => NS_MEDIAWIKI_TALK,
|
|
|
|
|
"NS_TEMPLATE" => NS_TEMPLATE,
|
|
|
|
|
"NS_TEMPLATE_TALK" => NS_TEMPLATE_TALK,
|
|
|
|
|
"NS_HELP" => NS_HELP,
|
|
|
|
|
"NS_HELP_TALK" => NS_HELP_TALK,
|
|
|
|
|
"NS_CATEGORY" => NS_CATEGORY,
|
|
|
|
|
"NS_CATEGORY_TALK" => NS_CATEGORY_TALK );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $param { 0 } == '!' ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->invert = true;
|
|
|
|
|
$param = substr( $param, 1 );
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2011-04-07 21:04:16 +00:00
|
|
|
foreach ( explode( ',', $param ) as $key ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$key = trim( $key );
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( isset( $constants[$key] ) ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$ns = $constants[$key];
|
|
|
|
|
$this->namespaces[$ns] = true;
|
2011-04-07 21:04:16 +00:00
|
|
|
} elseif ( is_numeric( $key ) ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$ns = intval( $key );
|
|
|
|
|
$this->namespaces[$ns] = true;
|
2005-10-02 07:17:41 +00:00
|
|
|
} else {
|
2006-06-07 06:40:24 +00:00
|
|
|
throw new MWException( "Unrecognized namespace key '$key'\n" );
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $page
|
|
|
|
|
* @return bool
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function pass( $page ) {
|
|
|
|
|
$match = isset( $this->namespaces[$page->page_namespace] );
|
|
|
|
|
return $this->invert xor $match;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Dump output filter to include only the last revision in each page sequence.
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup Dump
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
|
|
|
|
class DumpLatestFilter extends DumpFilter {
|
|
|
|
|
var $page, $pageString, $rev, $revString;
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $page
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeOpenPage( $page, $string ) {
|
|
|
|
|
$this->page = $page;
|
|
|
|
|
$this->pageString = $string;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeClosePage( $string ) {
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $this->rev ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->sink->writeOpenPage( $this->page, $this->pageString );
|
|
|
|
|
$this->sink->writeRevision( $this->rev, $this->revString );
|
|
|
|
|
$this->sink->writeClosePage( $string );
|
|
|
|
|
}
|
|
|
|
|
$this->rev = null;
|
|
|
|
|
$this->revString = null;
|
|
|
|
|
$this->page = null;
|
|
|
|
|
$this->pageString = null;
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $rev
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeRevision( $rev, $string ) {
|
2011-04-07 21:04:16 +00:00
|
|
|
if ( $rev->rev_id == $this->page->page_latest ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->rev = $rev;
|
|
|
|
|
$this->revString = $string;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Base class for output stream; prints to stdout or buffer or whereever.
|
WARNING: HUGE COMMIT
Doxygen documentation update:
* Changed alls @addtogroup to @ingroup. @addtogroup adds the comment to the group description, but doesn't add the file, class, function, ... to the group like @ingroup does. See for example http://svn.wikimedia.org/doc/group__SpecialPage.html where it's impossible to see related files, classes, ... that should belong to that group.
* Added @file to file description, it seems that it should be explicitely decalred for file descriptions, otherwise doxygen will think that the comment document the first class, variabled, function, ... that is in that file.
* Removed some empty comments
* Removed some ?>
Added following groups:
* ExternalStorage
* JobQueue
* MaintenanceLanguage
One more thing: there are still a lot of warnings when generating the doc.
2008-05-20 17:13:28 +00:00
|
|
|
* @ingroup Dump
|
2005-10-02 04:05:40 +00:00
|
|
|
*/
|
|
|
|
|
class DumpMultiWriter {
|
2012-04-28 22:47:23 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param $sinks
|
|
|
|
|
*/
|
2010-08-30 16:52:51 +00:00
|
|
|
function __construct( $sinks ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->sinks = $sinks;
|
|
|
|
|
$this->count = count( $sinks );
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeOpenStream( $string ) {
|
2011-04-07 21:04:16 +00:00
|
|
|
for ( $i = 0; $i < $this->count; $i++ ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->sinks[$i]->writeOpenStream( $string );
|
|
|
|
|
}
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeCloseStream( $string ) {
|
2011-04-07 21:04:16 +00:00
|
|
|
for ( $i = 0; $i < $this->count; $i++ ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->sinks[$i]->writeCloseStream( $string );
|
|
|
|
|
}
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $page
|
|
|
|
|
* @param $string string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeOpenPage( $page, $string ) {
|
2011-04-07 21:04:16 +00:00
|
|
|
for ( $i = 0; $i < $this->count; $i++ ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->sinks[$i]->writeOpenPage( $page, $string );
|
|
|
|
|
}
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeClosePage( $string ) {
|
2011-04-07 21:04:16 +00:00
|
|
|
for ( $i = 0; $i < $this->count; $i++ ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->sinks[$i]->writeClosePage( $string );
|
|
|
|
|
}
|
|
|
|
|
}
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $rev
|
|
|
|
|
* @param $string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function writeRevision( $rev, $string ) {
|
2011-04-07 21:04:16 +00:00
|
|
|
for ( $i = 0; $i < $this->count; $i++ ) {
|
2005-10-02 04:05:40 +00:00
|
|
|
$this->sinks[$i]->writeRevision( $rev, $string );
|
|
|
|
|
}
|
|
|
|
|
}
|
2011-08-22 22:01:32 +00:00
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $newnames
|
|
|
|
|
*/
|
2011-08-22 22:01:32 +00:00
|
|
|
function closeRenameAndReopen( $newnames ) {
|
2011-09-07 20:21:52 +00:00
|
|
|
$this->closeAndRename( $newnames, true );
|
2011-08-22 22:01:32 +00:00
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $newnames array
|
|
|
|
|
* @param bool $open
|
|
|
|
|
*/
|
2011-09-07 20:21:52 +00:00
|
|
|
function closeAndRename( $newnames, $open = false ) {
|
2011-09-05 11:23:49 +00:00
|
|
|
for ( $i = 0; $i < $this->count; $i++ ) {
|
2011-09-07 20:21:52 +00:00
|
|
|
$this->sinks[$i]->closeAndRename( $newnames[$i], $open );
|
2011-08-22 22:01:32 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @return array
|
|
|
|
|
*/
|
2011-09-09 07:28:11 +00:00
|
|
|
function getFilenames() {
|
2011-08-22 22:01:32 +00:00
|
|
|
$filenames = array();
|
2011-09-05 11:23:49 +00:00
|
|
|
for ( $i = 0; $i < $this->count; $i++ ) {
|
2011-09-09 07:28:11 +00:00
|
|
|
$filenames[] = $this->sinks[$i]->getFilenames();
|
2011-08-22 22:01:32 +00:00
|
|
|
}
|
|
|
|
|
return $filenames;
|
|
|
|
|
}
|
|
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
}
|
|
|
|
|
|
2012-04-28 22:47:23 +00:00
|
|
|
/**
|
|
|
|
|
* @param $string string
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
2005-10-02 04:05:40 +00:00
|
|
|
function xmlsafe( $string ) {
|
2010-04-04 18:33:57 +00:00
|
|
|
wfProfileIn( __FUNCTION__ );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
/**
|
|
|
|
|
* The page may contain old data which has not been properly normalized.
|
|
|
|
|
* Invalid UTF-8 sequences or forbidden control characters will make our
|
|
|
|
|
* XML output invalid, so be sure to strip them out.
|
|
|
|
|
*/
|
|
|
|
|
$string = UtfNormal::cleanUp( $string );
|
2006-01-07 13:31:29 +00:00
|
|
|
|
2005-10-02 04:05:40 +00:00
|
|
|
$string = htmlspecialchars( $string );
|
2010-04-04 18:33:57 +00:00
|
|
|
wfProfileOut( __FUNCTION__ );
|
2005-10-02 04:05:40 +00:00
|
|
|
return $string;
|
|
|
|
|
}
|