This makes BackupDumper compatible with the new mechanism for accessing revision content. This requires some changes to the way database connections are re-used, since RevisionStore/SqlBlobStore needs to be able to run queries against the database while the overall result set is being streamed. This change does not yet add handing for extra slots to BackupDumper. That first needs a spec for how extra slots will be represented in the XML schma (T174031). NOTE: this changes the output of fetchText from using integer text_id values to using content_address values (e.g. "tt:4567" for text row with old_id 4567). It also changes fetchText to accept such addresses as input, for forward-compatibility. XML stub dumps still use the numeric format in the id attribute, pending T199121. Bug: T198706 Change-Id: If4c31b7975b4d901afa8c194c10446c99e27eadf
100 lines
2.8 KiB
PHP
100 lines
2.8 KiB
PHP
<?php
|
|
/**
|
|
* Communications protocol.
|
|
* This is used by dumpTextPass.php when the --spawn option is present.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*
|
|
* @file
|
|
* @ingroup Maintenance
|
|
*/
|
|
|
|
require_once __DIR__ . '/Maintenance.php';
|
|
|
|
use MediaWiki\MediaWikiServices;
|
|
use MediaWiki\Storage\BlobAccessException;
|
|
use MediaWiki\Storage\SqlBlobStore;
|
|
|
|
/**
|
|
* Maintenance script used to fetch page text in a subprocess.
|
|
*
|
|
* @ingroup Maintenance
|
|
*/
|
|
class FetchText extends Maintenance {
|
|
|
|
public function __construct() {
|
|
parent::__construct();
|
|
|
|
$this->addDescription( "Fetch the raw revision blob from a blob address.\n" .
|
|
"Integer IDs are interpreted as referring to text.old_id for backwards compatibility.\n" .
|
|
"NOTE: Export transformations are NOT applied. " .
|
|
"This is left to dumpTextPass.php"
|
|
);
|
|
}
|
|
|
|
/**
|
|
* @return SqlBlobStore
|
|
*/
|
|
private function getBlobStore() {
|
|
return MediaWikiServices::getInstance()->getBlobStore();
|
|
}
|
|
|
|
/**
|
|
* returns a string containing the following in order:
|
|
* textid
|
|
* \n
|
|
* length of text (-1 on error = failure to retrieve/unserialize/gunzip/etc)
|
|
* \n
|
|
* text (may be empty)
|
|
*
|
|
* note that the text string itself is *not* followed by newline
|
|
*/
|
|
public function execute() {
|
|
$stdin = $this->getStdin();
|
|
while ( !feof( $stdin ) ) {
|
|
$line = fgets( $stdin );
|
|
if ( $line === false ) {
|
|
// We appear to have lost contact...
|
|
break;
|
|
}
|
|
$blobAddress = trim( $line );
|
|
|
|
// Plain integers are supported for backwards compatibility with pre-MCR dumps.
|
|
if ( strpos( $blobAddress, ':' ) === false && is_numeric( $blobAddress ) ) {
|
|
$blobAddress = SqlBlobStore::makeAddressFromTextId( intval( $blobAddress ) );
|
|
}
|
|
|
|
try {
|
|
$text = $this->getBlobStore()->getBlob( $blobAddress );
|
|
$textLen = strlen( $text );
|
|
} catch ( BlobAccessException $ex ) {
|
|
// XXX: log $ex to stderr?
|
|
$textLen = '-1';
|
|
$text = '';
|
|
} catch ( InvalidArgumentException $ex ) {
|
|
// XXX: log $ex to stderr?
|
|
$textLen = '-1';
|
|
$text = '';
|
|
}
|
|
|
|
$this->output( $blobAddress . "\n" . $textLen . "\n" . $text );
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
$maintClass = FetchText::class;
|
|
require_once RUN_MAINTENANCE_IF_MAIN;
|