2008-09-18 16:41:43 +00:00
|
|
|
<?php
|
|
|
|
|
/**
|
2012-07-10 12:48:06 +00:00
|
|
|
* MediaWiki page data importer.
|
2010-08-08 14:23:14 +00:00
|
|
|
*
|
|
|
|
|
* Copyright © 2003,2005 Brion Vibber <brion@pobox.com>
|
2014-03-12 22:30:35 +00:00
|
|
|
* https://www.mediawiki.org/
|
2008-09-18 16:41:43 +00:00
|
|
|
*
|
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
|
|
|
*
|
|
|
|
|
* @file
|
|
|
|
|
* @ingroup SpecialPage
|
|
|
|
|
*/
|
|
|
|
|
|
2021-04-30 15:21:49 +00:00
|
|
|
use MediaWiki\Cache\CacheKeyHelper;
|
2021-05-14 23:16:34 +00:00
|
|
|
use MediaWiki\Content\IContentHandlerFactory;
|
|
|
|
|
use MediaWiki\HookContainer\HookContainer;
|
Hooks::run() call site migration
Migrate all callers of Hooks::run() to use the new
HookContainer/HookRunner system.
General principles:
* Use DI if it is already used. We're not changing the way state is
managed in this patch.
* HookContainer is always injected, not HookRunner. HookContainer
is a service, it's a more generic interface, it is the only
thing that provides isRegistered() which is needed in some cases,
and a HookRunner can be efficiently constructed from it
(confirmed by benchmark). Because HookContainer is needed
for object construction, it is also needed by all factories.
* "Ask your friendly local base class". Big hierarchies like
SpecialPage and ApiBase have getHookContainer() and getHookRunner()
methods in the base class, and classes that extend that base class
are not expected to know or care where the base class gets its
HookContainer from.
* ProtectedHookAccessorTrait provides protected getHookContainer() and
getHookRunner() methods, getting them from the global service
container. The point of this is to ease migration to DI by ensuring
that call sites ask their local friendly base class rather than
getting a HookRunner from the service container directly.
* Private $this->hookRunner. In some smaller classes where accessor
methods did not seem warranted, there is a private HookRunner property
which is accessed directly. Very rarely (two cases), there is a
protected property, for consistency with code that conventionally
assumes protected=private, but in cases where the class might actually
be overridden, a protected accessor is preferred over a protected
property.
* The last resort: Hooks::runner(). Mostly for static, file-scope and
global code. In a few cases it was used for objects with broken
construction schemes, out of horror or laziness.
Constructors with new required arguments:
* AuthManager
* BadFileLookup
* BlockManager
* ClassicInterwikiLookup
* ContentHandlerFactory
* ContentSecurityPolicy
* DefaultOptionsManager
* DerivedPageDataUpdater
* FullSearchResultWidget
* HtmlCacheUpdater
* LanguageFactory
* LanguageNameUtils
* LinkRenderer
* LinkRendererFactory
* LocalisationCache
* MagicWordFactory
* MessageCache
* NamespaceInfo
* PageEditStash
* PageHandlerFactory
* PageUpdater
* ParserFactory
* PermissionManager
* RevisionStore
* RevisionStoreFactory
* SearchEngineConfig
* SearchEngineFactory
* SearchFormWidget
* SearchNearMatcher
* SessionBackend
* SpecialPageFactory
* UserNameUtils
* UserOptionsManager
* WatchedItemQueryService
* WatchedItemStore
Constructors with new optional arguments:
* DefaultPreferencesFactory
* Language
* LinkHolderArray
* MovePage
* Parser
* ParserCache
* PasswordReset
* Router
setHookContainer() now required after construction:
* AuthenticationProvider
* ResourceLoaderModule
* SearchEngine
Change-Id: Id442b0dbe43aba84bd5cf801d86dedc768b082c7
2020-03-19 02:42:09 +00:00
|
|
|
use MediaWiki\HookContainer\HookRunner;
|
2018-07-29 12:24:54 +00:00
|
|
|
use MediaWiki\MediaWikiServices;
|
2021-04-30 15:21:49 +00:00
|
|
|
use MediaWiki\Page\PageIdentity;
|
2021-05-14 23:16:34 +00:00
|
|
|
use MediaWiki\Page\WikiPageFactory;
|
|
|
|
|
use MediaWiki\Permissions\PermissionManager;
|
2020-04-06 09:16:17 +00:00
|
|
|
use MediaWiki\Revision\SlotRecord;
|
2021-05-14 23:16:34 +00:00
|
|
|
use MediaWiki\Revision\SlotRoleRegistry;
|
2018-07-29 12:24:54 +00:00
|
|
|
|
2011-01-31 07:07:43 +00:00
|
|
|
/**
|
2015-12-28 22:27:48 +00:00
|
|
|
* XML file reader for the page data importer.
|
2011-01-31 07:07:43 +00:00
|
|
|
*
|
|
|
|
|
* implements Special:Import
|
|
|
|
|
* @ingroup SpecialPage
|
|
|
|
|
*/
|
|
|
|
|
class WikiImporter {
|
2019-09-15 13:22:08 +00:00
|
|
|
/** @var XMLReader */
|
|
|
|
|
private $reader;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/** @var array|null */
|
2014-12-10 11:24:47 +00:00
|
|
|
private $foreignNamespaces = null;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/** @var callable */
|
|
|
|
|
private $mLogItemCallback;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/** @var callable */
|
|
|
|
|
private $mUploadCallback;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/** @var callable */
|
|
|
|
|
private $mRevisionCallback;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/** @var callable */
|
|
|
|
|
private $mPageCallback;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/** @var callable|null */
|
|
|
|
|
private $mSiteInfoCallback;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/** @var callable */
|
|
|
|
|
private $mPageOutCallback;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/** @var callable|null */
|
|
|
|
|
private $mNoticeCallback;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/** @var bool|null */
|
|
|
|
|
private $mDebug;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/** @var bool|null */
|
|
|
|
|
private $mImportUploads;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/** @var string|null */
|
|
|
|
|
private $mImageBasePath;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/** @var bool */
|
2011-08-02 14:05:01 +00:00
|
|
|
private $mNoUpdates = false;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/** @var int */
|
2017-05-26 01:54:32 +00:00
|
|
|
private $pageOffset = 0;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2014-08-23 07:40:00 +00:00
|
|
|
/** @var Config */
|
|
|
|
|
private $config;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2014-12-10 11:24:47 +00:00
|
|
|
/** @var ImportTitleFactory */
|
|
|
|
|
private $importTitleFactory;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
Hooks::run() call site migration
Migrate all callers of Hooks::run() to use the new
HookContainer/HookRunner system.
General principles:
* Use DI if it is already used. We're not changing the way state is
managed in this patch.
* HookContainer is always injected, not HookRunner. HookContainer
is a service, it's a more generic interface, it is the only
thing that provides isRegistered() which is needed in some cases,
and a HookRunner can be efficiently constructed from it
(confirmed by benchmark). Because HookContainer is needed
for object construction, it is also needed by all factories.
* "Ask your friendly local base class". Big hierarchies like
SpecialPage and ApiBase have getHookContainer() and getHookRunner()
methods in the base class, and classes that extend that base class
are not expected to know or care where the base class gets its
HookContainer from.
* ProtectedHookAccessorTrait provides protected getHookContainer() and
getHookRunner() methods, getting them from the global service
container. The point of this is to ease migration to DI by ensuring
that call sites ask their local friendly base class rather than
getting a HookRunner from the service container directly.
* Private $this->hookRunner. In some smaller classes where accessor
methods did not seem warranted, there is a private HookRunner property
which is accessed directly. Very rarely (two cases), there is a
protected property, for consistency with code that conventionally
assumes protected=private, but in cases where the class might actually
be overridden, a protected accessor is preferred over a protected
property.
* The last resort: Hooks::runner(). Mostly for static, file-scope and
global code. In a few cases it was used for objects with broken
construction schemes, out of horror or laziness.
Constructors with new required arguments:
* AuthManager
* BadFileLookup
* BlockManager
* ClassicInterwikiLookup
* ContentHandlerFactory
* ContentSecurityPolicy
* DefaultOptionsManager
* DerivedPageDataUpdater
* FullSearchResultWidget
* HtmlCacheUpdater
* LanguageFactory
* LanguageNameUtils
* LinkRenderer
* LinkRendererFactory
* LocalisationCache
* MagicWordFactory
* MessageCache
* NamespaceInfo
* PageEditStash
* PageHandlerFactory
* PageUpdater
* ParserFactory
* PermissionManager
* RevisionStore
* RevisionStoreFactory
* SearchEngineConfig
* SearchEngineFactory
* SearchFormWidget
* SearchNearMatcher
* SessionBackend
* SpecialPageFactory
* UserNameUtils
* UserOptionsManager
* WatchedItemQueryService
* WatchedItemStore
Constructors with new optional arguments:
* DefaultPreferencesFactory
* Language
* LinkHolderArray
* MovePage
* Parser
* ParserCache
* PasswordReset
* Router
setHookContainer() now required after construction:
* AuthenticationProvider
* ResourceLoaderModule
* SearchEngine
Change-Id: Id442b0dbe43aba84bd5cf801d86dedc768b082c7
2020-03-19 02:42:09 +00:00
|
|
|
/** @var HookRunner */
|
|
|
|
|
private $hookRunner;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2015-02-04 07:00:36 +00:00
|
|
|
/** @var array */
|
2016-02-17 09:09:32 +00:00
|
|
|
private $countableCache = [];
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2017-01-03 14:35:34 +00:00
|
|
|
/** @var bool */
|
|
|
|
|
private $disableStatisticsUpdate = false;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
2018-01-11 12:20:35 +00:00
|
|
|
/** @var ExternalUserNames */
|
|
|
|
|
private $externalUserNames;
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2021-05-14 23:16:34 +00:00
|
|
|
/** @var Language */
|
|
|
|
|
private $contentLanguage;
|
|
|
|
|
|
|
|
|
|
/** @var NamespaceInfo */
|
|
|
|
|
private $namespaceInfo;
|
|
|
|
|
|
|
|
|
|
/** @var TitleFactory */
|
|
|
|
|
private $titleFactory;
|
|
|
|
|
|
|
|
|
|
/** @var WikiPageFactory */
|
|
|
|
|
private $wikiPageFactory;
|
|
|
|
|
|
|
|
|
|
/** @var UploadRevisionImporter */
|
|
|
|
|
private $uploadRevisionImporter;
|
|
|
|
|
|
|
|
|
|
/** @var PermissionManager */
|
|
|
|
|
private $permissionManager;
|
|
|
|
|
|
|
|
|
|
/** @var IContentHandlerFactory */
|
|
|
|
|
private $contentHandlerFactory;
|
|
|
|
|
|
|
|
|
|
/** @var SlotRoleRegistry */
|
|
|
|
|
private $slotRoleRegistry;
|
|
|
|
|
|
2011-01-31 07:07:43 +00:00
|
|
|
/**
|
|
|
|
|
* Creates an ImportXMLReader drawing from the source provided
|
2015-02-10 10:34:09 +00:00
|
|
|
* @param ImportSource $source
|
2014-08-23 07:40:00 +00:00
|
|
|
* @param Config $config
|
2021-05-14 23:16:34 +00:00
|
|
|
* @param HookContainer|null $hookContainer
|
|
|
|
|
* @param Language|null $contentLanguage
|
|
|
|
|
* @param NamespaceInfo|null $namespaceInfo
|
|
|
|
|
* @param TitleFactory|null $titleFactory
|
|
|
|
|
* @param WikiPageFactory|null $wikiPageFactory
|
|
|
|
|
* @param UploadRevisionImporter|null $uploadRevisionImporter
|
|
|
|
|
* @param PermissionManager|null $permissionManager
|
|
|
|
|
* @param IContentHandlerFactory|null $contentHandlerFactory
|
|
|
|
|
* @param SlotRoleRegistry|null $slotRoleRegistry
|
2020-12-27 16:04:42 +00:00
|
|
|
* @throws MWException
|
2011-05-29 14:24:27 +00:00
|
|
|
*/
|
2021-05-14 23:16:34 +00:00
|
|
|
public function __construct(
|
|
|
|
|
ImportSource $source,
|
|
|
|
|
Config $config,
|
|
|
|
|
HookContainer $hookContainer = null,
|
|
|
|
|
Language $contentLanguage = null,
|
|
|
|
|
NamespaceInfo $namespaceInfo = null,
|
|
|
|
|
TitleFactory $titleFactory = null,
|
|
|
|
|
WikiPageFactory $wikiPageFactory = null,
|
|
|
|
|
UploadRevisionImporter $uploadRevisionImporter = null,
|
|
|
|
|
PermissionManager $permissionManager = null,
|
|
|
|
|
IContentHandlerFactory $contentHandlerFactory = null,
|
|
|
|
|
SlotRoleRegistry $slotRoleRegistry = null
|
|
|
|
|
) {
|
2011-02-03 01:25:09 +00:00
|
|
|
$this->reader = new XMLReader();
|
2014-08-23 07:40:00 +00:00
|
|
|
$this->config = $config;
|
2021-05-14 23:16:34 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* This class is used by several extensions, thus a fallback to global state
|
|
|
|
|
* is provided here.
|
|
|
|
|
*/
|
|
|
|
|
$this->hookRunner = $hookContainer ? new HookRunner( $hookContainer ) : Hooks::runner();
|
|
|
|
|
$this->contentLanguage = $contentLanguage
|
|
|
|
|
?? MediaWikiServices::getInstance()->getContentLanguage();
|
|
|
|
|
$this->namespaceInfo = $namespaceInfo
|
|
|
|
|
?? MediaWikiServices::getInstance()->getNamespaceInfo();
|
|
|
|
|
$this->titleFactory = $titleFactory
|
|
|
|
|
?? MediaWikiServices::getInstance()->getTitleFactory();
|
|
|
|
|
$this->wikiPageFactory = $wikiPageFactory
|
|
|
|
|
?? MediaWikiServices::getInstance()->getWikiPageFactory();
|
|
|
|
|
$this->uploadRevisionImporter = $uploadRevisionImporter
|
|
|
|
|
?? MediaWikiServices::getInstance()->getWikiRevisionUploadImporter();
|
|
|
|
|
$this->permissionManager = $permissionManager
|
|
|
|
|
?? MediaWikiServices::getInstance()->getPermissionManager();
|
|
|
|
|
$this->contentHandlerFactory = $contentHandlerFactory
|
|
|
|
|
?? MediaWikiServices::getInstance()->getContentHandlerFactory();
|
|
|
|
|
$this->slotRoleRegistry = $slotRoleRegistry
|
|
|
|
|
?? MediaWikiServices::getInstance()->getSlotRoleRegistry();
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2014-07-19 21:12:10 +00:00
|
|
|
if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) {
|
2018-01-13 00:02:09 +00:00
|
|
|
stream_wrapper_register( 'uploadsource', UploadSourceAdapter::class );
|
2014-05-20 12:26:33 +00:00
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
$id = UploadSourceAdapter::registerSource( $source );
|
2015-01-13 05:57:07 +00:00
|
|
|
|
2021-01-27 16:06:29 +00:00
|
|
|
// Enable the entity loader, as it is needed for loading external URLs via
|
|
|
|
|
// XMLReader::open (T86036)
|
|
|
|
|
$oldDisable = libxml_disable_entity_loader( false );
|
2013-03-24 10:01:51 +00:00
|
|
|
if ( defined( 'LIBXML_PARSEHUGE' ) ) {
|
2015-01-13 05:57:07 +00:00
|
|
|
$status = $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
|
2011-10-29 01:53:28 +00:00
|
|
|
} else {
|
2015-01-13 05:57:07 +00:00
|
|
|
$status = $this->reader->open( "uploadsource://$id" );
|
|
|
|
|
}
|
|
|
|
|
if ( !$status ) {
|
|
|
|
|
$error = libxml_get_last_error();
|
2021-01-27 16:06:29 +00:00
|
|
|
libxml_disable_entity_loader( $oldDisable );
|
2015-01-13 05:57:07 +00:00
|
|
|
throw new MWException( 'Encountered an internal error while initializing WikiImporter object: ' .
|
|
|
|
|
$error->message );
|
2011-07-12 15:01:58 +00:00
|
|
|
}
|
2021-01-27 16:06:29 +00:00
|
|
|
libxml_disable_entity_loader( $oldDisable );
|
2011-01-31 07:07:43 +00:00
|
|
|
|
|
|
|
|
// Default callbacks
|
2016-02-17 09:09:32 +00:00
|
|
|
$this->setPageCallback( [ $this, 'beforeImportPage' ] );
|
|
|
|
|
$this->setRevisionCallback( [ $this, "importRevision" ] );
|
|
|
|
|
$this->setUploadCallback( [ $this, 'importUpload' ] );
|
|
|
|
|
$this->setLogItemCallback( [ $this, 'importLogItem' ] );
|
|
|
|
|
$this->setPageOutCallback( [ $this, 'finishImportPage' ] );
|
2014-12-10 11:24:47 +00:00
|
|
|
|
2021-03-22 21:03:38 +00:00
|
|
|
$this->importTitleFactory = new NaiveImportTitleFactory(
|
2021-05-14 23:16:34 +00:00
|
|
|
$this->contentLanguage,
|
|
|
|
|
$this->namespaceInfo,
|
|
|
|
|
$this->titleFactory
|
2021-03-22 21:03:38 +00:00
|
|
|
);
|
2018-01-11 12:20:35 +00:00
|
|
|
$this->externalUserNames = new ExternalUserNames( 'imported', false );
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
|
2014-05-13 12:22:27 +00:00
|
|
|
/**
|
2014-05-15 14:44:25 +00:00
|
|
|
* @return null|XMLReader
|
2014-05-13 12:22:27 +00:00
|
|
|
*/
|
2014-05-15 14:44:25 +00:00
|
|
|
public function getReader() {
|
2014-05-13 12:22:27 +00:00
|
|
|
return $this->reader;
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/**
|
|
|
|
|
* @param string $err
|
|
|
|
|
*/
|
2014-05-13 12:22:27 +00:00
|
|
|
public function throwXmlError( $err ) {
|
2011-01-31 07:07:43 +00:00
|
|
|
$this->debug( "FAILURE: $err" );
|
2020-06-01 05:00:39 +00:00
|
|
|
wfDebug( "WikiImporter XML error: $err" );
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/**
|
|
|
|
|
* @param string $data
|
|
|
|
|
*/
|
2014-05-13 12:22:27 +00:00
|
|
|
public function debug( $data ) {
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( $this->mDebug ) {
|
2020-06-01 05:00:39 +00:00
|
|
|
wfDebug( "IMPORT: $data" );
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/**
|
|
|
|
|
* @param string $data
|
|
|
|
|
*/
|
2014-05-13 12:22:27 +00:00
|
|
|
public function warn( $data ) {
|
2020-06-01 05:00:39 +00:00
|
|
|
wfDebug( "IMPORT: $data" );
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
|
2020-11-21 01:44:41 +00:00
|
|
|
/**
|
|
|
|
|
* @param string $msg
|
|
|
|
|
* @param mixed ...$params
|
|
|
|
|
*/
|
Get rid of unnecessary func_get_args() and friends
HHVM does not support variadic arguments with type hints. This is
mostly not a big problem, because we can just drop the type hint, but
for some reason PHPUnit adds a type hint of "array" when it creates
mocks, so a class with a variadic method can't be mocked (at least in
some cases). As such, I left alone all the classes that seem like
someone might like to mock them, like Title and User. If anyone wants
to mock them in the future, they'll have to switch back to
func_get_args(). Some of the changes are definitely safe, like
functions and test classes.
In most cases, func_get_args() (and/or func_get_arg(), func_num_args() )
were only present because the code was written before we required PHP
5.6, and writing them as variadic functions is strictly superior. In
some cases I left them alone, aside from HHVM compatibility:
* Forwarding all arguments to another function. It's useful to keep
func_get_args() here where we want to keep the list of expected
arguments and their meanings in the function signature line for
documentation purposes, but don't want to copy-paste a long line of
argument names.
* Handling deprecated calling conventions.
* One or two miscellaneous cases where we're basically using the
arguments individually but want to use them as an array as well for
some reason.
Change-Id: I066ec95a7beb7c0665146195a08e7cce1222c788
2018-10-08 14:10:45 +00:00
|
|
|
public function notice( $msg, ...$params ) {
|
2012-01-06 14:21:16 +00:00
|
|
|
if ( is_callable( $this->mNoticeCallback ) ) {
|
|
|
|
|
call_user_func( $this->mNoticeCallback, $msg, $params );
|
|
|
|
|
} else { # No ImportReporter -> CLI
|
2017-12-22 03:12:28 +00:00
|
|
|
// T177997: the command line importers should call setNoticeCallback()
|
|
|
|
|
// for their own custom callback to echo the notice
|
2020-06-01 05:00:39 +00:00
|
|
|
wfDebug( wfMessage( $msg, $params )->text() );
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Set debug mode...
|
2014-04-23 09:30:40 +00:00
|
|
|
* @param bool $debug
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
2020-05-09 11:35:40 +00:00
|
|
|
public function setDebug( $debug ) {
|
2011-01-31 07:07:43 +00:00
|
|
|
$this->mDebug = $debug;
|
|
|
|
|
}
|
2011-10-26 03:44:47 +00:00
|
|
|
|
2011-08-02 14:05:01 +00:00
|
|
|
/**
|
|
|
|
|
* Set 'no updates' mode. In this mode, the link tables will not be updated by the importer
|
2014-04-23 09:30:40 +00:00
|
|
|
* @param bool $noupdates
|
2011-08-02 14:05:01 +00:00
|
|
|
*/
|
2020-05-09 11:35:40 +00:00
|
|
|
public function setNoUpdates( $noupdates ) {
|
2011-08-02 14:05:01 +00:00
|
|
|
$this->mNoUpdates = $noupdates;
|
|
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2017-05-26 01:54:32 +00:00
|
|
|
/**
|
|
|
|
|
* Sets 'pageOffset' value. So it will skip the first n-1 pages
|
|
|
|
|
* and start from the nth page. It's 1-based indexing.
|
|
|
|
|
* @param int $nthPage
|
|
|
|
|
* @since 1.29
|
|
|
|
|
*/
|
2020-05-09 11:35:40 +00:00
|
|
|
public function setPageOffset( $nthPage ) {
|
2017-05-26 01:54:32 +00:00
|
|
|
$this->pageOffset = $nthPage;
|
|
|
|
|
}
|
|
|
|
|
|
2012-01-06 14:21:16 +00:00
|
|
|
/**
|
|
|
|
|
* Set a callback that displays notice messages
|
|
|
|
|
*
|
2014-04-19 06:43:31 +00:00
|
|
|
* @param callable $callback
|
|
|
|
|
* @return callable
|
2012-01-06 14:21:16 +00:00
|
|
|
*/
|
|
|
|
|
public function setNoticeCallback( $callback ) {
|
|
|
|
|
return wfSetVar( $this->mNoticeCallback, $callback );
|
|
|
|
|
}
|
|
|
|
|
|
2011-01-31 07:07:43 +00:00
|
|
|
/**
|
|
|
|
|
* Sets the action to perform as each new page in the stream is reached.
|
2014-04-19 06:43:31 +00:00
|
|
|
* @param callable $callback
|
|
|
|
|
* @return callable
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
|
|
|
|
public function setPageCallback( $callback ) {
|
|
|
|
|
$previous = $this->mPageCallback;
|
|
|
|
|
$this->mPageCallback = $callback;
|
|
|
|
|
return $previous;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Sets the action to perform as each page in the stream is completed.
|
|
|
|
|
* Callback accepts the page title (as a Title object), a second object
|
|
|
|
|
* with the original title form (in case it's been overridden into a
|
|
|
|
|
* local namespace), and a count of revisions.
|
|
|
|
|
*
|
2014-04-19 06:43:31 +00:00
|
|
|
* @param callable $callback
|
|
|
|
|
* @return callable
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
|
|
|
|
public function setPageOutCallback( $callback ) {
|
|
|
|
|
$previous = $this->mPageOutCallback;
|
|
|
|
|
$this->mPageOutCallback = $callback;
|
|
|
|
|
return $previous;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Sets the action to perform as each page revision is reached.
|
2014-04-19 06:43:31 +00:00
|
|
|
* @param callable $callback
|
|
|
|
|
* @return callable
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
|
|
|
|
public function setRevisionCallback( $callback ) {
|
|
|
|
|
$previous = $this->mRevisionCallback;
|
|
|
|
|
$this->mRevisionCallback = $callback;
|
|
|
|
|
return $previous;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Sets the action to perform as each file upload version is reached.
|
2014-04-19 06:43:31 +00:00
|
|
|
* @param callable $callback
|
|
|
|
|
* @return callable
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
|
|
|
|
public function setUploadCallback( $callback ) {
|
|
|
|
|
$previous = $this->mUploadCallback;
|
|
|
|
|
$this->mUploadCallback = $callback;
|
|
|
|
|
return $previous;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Sets the action to perform as each log item reached.
|
2014-04-19 06:43:31 +00:00
|
|
|
* @param callable $callback
|
|
|
|
|
* @return callable
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
|
|
|
|
public function setLogItemCallback( $callback ) {
|
|
|
|
|
$previous = $this->mLogItemCallback;
|
|
|
|
|
$this->mLogItemCallback = $callback;
|
|
|
|
|
return $previous;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Sets the action to perform when site info is encountered
|
2014-04-19 06:43:31 +00:00
|
|
|
* @param callable $callback
|
|
|
|
|
* @return callable
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
|
|
|
|
public function setSiteInfoCallback( $callback ) {
|
|
|
|
|
$previous = $this->mSiteInfoCallback;
|
|
|
|
|
$this->mSiteInfoCallback = $callback;
|
|
|
|
|
return $previous;
|
|
|
|
|
}
|
|
|
|
|
|
2014-12-10 11:24:47 +00:00
|
|
|
/**
|
|
|
|
|
* Sets the factory object to use to convert ForeignTitle objects into local
|
|
|
|
|
* Title objects
|
|
|
|
|
* @param ImportTitleFactory $factory
|
|
|
|
|
*/
|
|
|
|
|
public function setImportTitleFactory( $factory ) {
|
|
|
|
|
$this->importTitleFactory = $factory;
|
|
|
|
|
}
|
|
|
|
|
|
2011-01-31 07:07:43 +00:00
|
|
|
/**
|
|
|
|
|
* Set a target namespace to override the defaults
|
2014-04-23 09:30:40 +00:00
|
|
|
* @param null|int $namespace
|
2011-10-29 01:53:28 +00:00
|
|
|
* @return bool
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
|
|
|
|
public function setTargetNamespace( $namespace ) {
|
2020-01-09 23:48:34 +00:00
|
|
|
if ( $namespace === null ) {
|
2011-01-31 07:07:43 +00:00
|
|
|
// Don't override namespaces
|
2021-03-22 21:03:38 +00:00
|
|
|
$this->setImportTitleFactory(
|
|
|
|
|
new NaiveImportTitleFactory(
|
2021-05-14 23:16:34 +00:00
|
|
|
$this->contentLanguage,
|
|
|
|
|
$this->namespaceInfo,
|
|
|
|
|
$this->titleFactory
|
2021-03-22 21:03:38 +00:00
|
|
|
)
|
|
|
|
|
);
|
2014-12-10 11:24:47 +00:00
|
|
|
return true;
|
|
|
|
|
} elseif (
|
|
|
|
|
$namespace >= 0 &&
|
2021-05-14 23:16:34 +00:00
|
|
|
$this->namespaceInfo->exists( intval( $namespace ) )
|
2014-12-10 11:24:47 +00:00
|
|
|
) {
|
|
|
|
|
$namespace = intval( $namespace );
|
2021-03-22 21:03:38 +00:00
|
|
|
$this->setImportTitleFactory(
|
|
|
|
|
new NamespaceImportTitleFactory(
|
2021-05-14 23:16:34 +00:00
|
|
|
$this->namespaceInfo,
|
|
|
|
|
$this->titleFactory,
|
2021-03-22 21:03:38 +00:00
|
|
|
$namespace
|
|
|
|
|
)
|
|
|
|
|
);
|
2014-12-10 11:24:47 +00:00
|
|
|
return true;
|
2011-01-31 07:07:43 +00:00
|
|
|
} else {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
2011-10-26 03:44:47 +00:00
|
|
|
|
2012-08-20 14:55:28 +00:00
|
|
|
/**
|
|
|
|
|
* Set a target root page under which all pages are imported
|
2014-04-23 09:30:40 +00:00
|
|
|
* @param null|string $rootpage
|
|
|
|
|
* @return Status
|
2012-08-20 14:55:28 +00:00
|
|
|
*/
|
|
|
|
|
public function setTargetRootPage( $rootpage ) {
|
|
|
|
|
$status = Status::newGood();
|
2021-05-14 23:16:34 +00:00
|
|
|
$nsInfo = $this->namespaceInfo;
|
2020-01-09 23:48:34 +00:00
|
|
|
if ( $rootpage === null ) {
|
2012-08-20 14:55:28 +00:00
|
|
|
// No rootpage
|
2021-03-22 21:03:38 +00:00
|
|
|
$this->setImportTitleFactory(
|
|
|
|
|
new NaiveImportTitleFactory(
|
2021-05-14 23:16:34 +00:00
|
|
|
$this->contentLanguage,
|
2021-03-22 21:03:38 +00:00
|
|
|
$nsInfo,
|
2021-05-14 23:16:34 +00:00
|
|
|
$this->titleFactory
|
2021-03-22 21:03:38 +00:00
|
|
|
)
|
|
|
|
|
);
|
2013-04-20 22:49:30 +00:00
|
|
|
} elseif ( $rootpage !== '' ) {
|
2015-09-11 13:44:59 +00:00
|
|
|
$rootpage = rtrim( $rootpage, '/' ); // avoid double slashes
|
2015-01-27 09:01:04 +00:00
|
|
|
$title = Title::newFromText( $rootpage );
|
2014-05-15 15:38:28 +00:00
|
|
|
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( !$title || $title->isExternal() ) {
|
2012-08-20 14:55:28 +00:00
|
|
|
$status->fatal( 'import-rootpage-invalid' );
|
2021-03-22 21:03:38 +00:00
|
|
|
} elseif ( !$nsInfo->hasSubpages( $title->getNamespace() ) ) {
|
2020-07-22 17:29:48 +00:00
|
|
|
$displayNSText = $title->getNamespace() === NS_MAIN
|
2019-03-29 20:12:24 +00:00
|
|
|
? wfMessage( 'blanknamespace' )->text()
|
2021-05-14 23:16:34 +00:00
|
|
|
: $this->contentLanguage->getNsText( $title->getNamespace() );
|
2019-03-29 20:12:24 +00:00
|
|
|
$status->fatal( 'import-rootpage-nosubpage', $displayNSText );
|
2012-08-20 14:55:28 +00:00
|
|
|
} else {
|
2019-03-29 20:12:24 +00:00
|
|
|
// set namespace to 'all', so the namespace check in processTitle() can pass
|
|
|
|
|
$this->setTargetNamespace( null );
|
2021-03-22 21:03:38 +00:00
|
|
|
$this->setImportTitleFactory(
|
|
|
|
|
new SubpageImportTitleFactory(
|
|
|
|
|
$nsInfo,
|
2021-05-14 23:16:34 +00:00
|
|
|
$this->titleFactory,
|
2021-03-22 21:03:38 +00:00
|
|
|
$title
|
|
|
|
|
)
|
|
|
|
|
);
|
2012-08-20 14:55:28 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return $status;
|
|
|
|
|
}
|
|
|
|
|
|
2011-04-12 19:25:56 +00:00
|
|
|
/**
|
2014-04-23 09:30:40 +00:00
|
|
|
* @param string $dir
|
2011-04-12 19:25:56 +00:00
|
|
|
*/
|
|
|
|
|
public function setImageBasePath( $dir ) {
|
|
|
|
|
$this->mImageBasePath = $dir;
|
|
|
|
|
}
|
2011-10-26 03:44:47 +00:00
|
|
|
|
|
|
|
|
/**
|
2014-04-23 09:30:40 +00:00
|
|
|
* @param bool $import
|
2011-10-26 03:44:47 +00:00
|
|
|
*/
|
2011-05-15 10:39:15 +00:00
|
|
|
public function setImportUploads( $import ) {
|
|
|
|
|
$this->mImportUploads = $import;
|
|
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2017-10-25 19:26:53 +00:00
|
|
|
/**
|
|
|
|
|
* @since 1.31
|
|
|
|
|
* @param string $usernamePrefix Prefix to apply to unknown (and possibly also known) usernames
|
|
|
|
|
* @param bool $assignKnownUsers Whether to apply the prefix to usernames that exist locally
|
|
|
|
|
*/
|
|
|
|
|
public function setUsernamePrefix( $usernamePrefix, $assignKnownUsers ) {
|
2018-01-11 12:20:35 +00:00
|
|
|
$this->externalUserNames = new ExternalUserNames( $usernamePrefix, $assignKnownUsers );
|
2017-10-25 19:26:53 +00:00
|
|
|
}
|
|
|
|
|
|
2017-01-03 14:35:34 +00:00
|
|
|
/**
|
|
|
|
|
* Statistics update can cause a lot of time
|
|
|
|
|
* @since 1.29
|
|
|
|
|
*/
|
|
|
|
|
public function disableStatisticsUpdate() {
|
|
|
|
|
$this->disableStatisticsUpdate = true;
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-04 07:00:36 +00:00
|
|
|
/**
|
|
|
|
|
* Default per-page callback. Sets up some things related to site statistics
|
|
|
|
|
* @param array $titleAndForeignTitle Two-element array, with Title object at
|
|
|
|
|
* index 0 and ForeignTitle object at index 1
|
|
|
|
|
* @return bool
|
|
|
|
|
*/
|
|
|
|
|
public function beforeImportPage( $titleAndForeignTitle ) {
|
|
|
|
|
$title = $titleAndForeignTitle[0];
|
2021-05-14 23:16:34 +00:00
|
|
|
$page = $this->wikiPageFactory->newFromTitle( $title );
|
2015-02-04 07:00:36 +00:00
|
|
|
$this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2011-01-31 07:07:43 +00:00
|
|
|
/**
|
|
|
|
|
* Default per-revision callback, performs the import.
|
2014-04-23 09:30:40 +00:00
|
|
|
* @param WikiRevision $revision
|
2011-10-29 01:53:28 +00:00
|
|
|
* @return bool
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
|
|
|
|
public function importRevision( $revision ) {
|
2014-05-20 17:09:51 +00:00
|
|
|
if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
|
2013-12-10 13:36:52 +00:00
|
|
|
$this->notice( 'import-error-bad-location',
|
|
|
|
|
$revision->getTitle()->getPrefixedText(),
|
|
|
|
|
$revision->getID(),
|
|
|
|
|
$revision->getModel(),
|
2021-02-08 00:09:15 +00:00
|
|
|
$revision->getFormat()
|
|
|
|
|
);
|
2013-12-10 13:36:52 +00:00
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2012-12-12 11:47:40 +00:00
|
|
|
try {
|
2016-07-27 01:44:41 +00:00
|
|
|
return $revision->importOldRevision();
|
2012-12-12 11:47:40 +00:00
|
|
|
} catch ( MWContentSerializationException $ex ) {
|
|
|
|
|
$this->notice( 'import-error-unserialize',
|
|
|
|
|
$revision->getTitle()->getPrefixedText(),
|
|
|
|
|
$revision->getID(),
|
|
|
|
|
$revision->getModel(),
|
2021-02-08 00:09:15 +00:00
|
|
|
$revision->getFormat()
|
|
|
|
|
);
|
2012-12-12 11:47:40 +00:00
|
|
|
}
|
2013-12-10 13:36:52 +00:00
|
|
|
|
|
|
|
|
return false;
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Default per-revision callback, performs the import.
|
2014-05-15 15:38:28 +00:00
|
|
|
* @param WikiRevision $revision
|
2011-10-29 01:53:28 +00:00
|
|
|
* @return bool
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
2014-05-15 15:38:28 +00:00
|
|
|
public function importLogItem( $revision ) {
|
2016-07-27 01:44:41 +00:00
|
|
|
return $revision->importLogItem();
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Dummy for now...
|
2014-04-23 09:30:40 +00:00
|
|
|
* @param WikiRevision $revision
|
2011-10-29 01:53:28 +00:00
|
|
|
* @return bool
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
|
|
|
|
public function importUpload( $revision ) {
|
2021-05-14 23:16:34 +00:00
|
|
|
$status = $this->uploadRevisionImporter->import( $revision );
|
2020-11-20 03:42:54 +00:00
|
|
|
return $status->isGood();
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Mostly for hook use
|
2021-04-30 15:21:49 +00:00
|
|
|
* @param PageIdentity $pageIdentity
|
2014-12-10 11:24:47 +00:00
|
|
|
* @param ForeignTitle $foreignTitle
|
2014-04-23 09:30:40 +00:00
|
|
|
* @param int $revCount
|
|
|
|
|
* @param int $sRevCount
|
|
|
|
|
* @param array $pageInfo
|
2014-05-15 15:38:28 +00:00
|
|
|
* @return bool
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
2021-04-30 15:21:49 +00:00
|
|
|
public function finishImportPage( PageIdentity $pageIdentity, $foreignTitle, $revCount,
|
2017-07-01 08:32:08 +00:00
|
|
|
$sRevCount, $pageInfo
|
|
|
|
|
) {
|
2015-02-04 07:00:36 +00:00
|
|
|
// Update article count statistics (T42009)
|
|
|
|
|
// The normal counting logic in WikiPage->doEditUpdates() is designed for
|
|
|
|
|
// one-revision-at-a-time editing, not bulk imports. In this situation it
|
2016-09-05 20:21:26 +00:00
|
|
|
// suffers from issues of replica DB lag. We let WikiPage handle the total page
|
2015-02-04 07:00:36 +00:00
|
|
|
// and revision count, and we implement our own custom logic for the
|
|
|
|
|
// article (content page) count.
|
2017-01-03 14:35:34 +00:00
|
|
|
if ( !$this->disableStatisticsUpdate ) {
|
2021-05-14 23:16:34 +00:00
|
|
|
$page = $this->wikiPageFactory->newFromTitle( $pageIdentity );
|
2021-04-30 15:21:49 +00:00
|
|
|
|
2017-01-03 14:35:34 +00:00
|
|
|
$page->loadPageData( 'fromdbmaster' );
|
|
|
|
|
$content = $page->getContent();
|
|
|
|
|
if ( $content === null ) {
|
2021-04-30 15:21:49 +00:00
|
|
|
wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $pageIdentity .
|
2017-01-03 14:35:34 +00:00
|
|
|
' because WikiPage::getContent() returned null' );
|
|
|
|
|
} else {
|
2021-06-27 01:00:34 +00:00
|
|
|
// No user is available
|
|
|
|
|
$user = RequestContext::getMain()->getUser();
|
|
|
|
|
$editInfo = $page->prepareContentForEdit( $content, null, $user );
|
2021-04-30 15:21:49 +00:00
|
|
|
$countKey = 'title_' . CacheKeyHelper::getKeyForPage( $pageIdentity );
|
2017-01-03 14:35:34 +00:00
|
|
|
$countable = $page->isCountable( $editInfo );
|
|
|
|
|
if ( array_key_exists( $countKey, $this->countableCache ) &&
|
|
|
|
|
$countable != $this->countableCache[$countKey] ) {
|
|
|
|
|
DeferredUpdates::addUpdate( SiteStatsUpdate::factory( [
|
|
|
|
|
'articles' => ( (int)$countable - (int)$this->countableCache[$countKey] )
|
|
|
|
|
] ) );
|
|
|
|
|
}
|
2015-03-29 09:30:15 +00:00
|
|
|
}
|
2015-02-04 07:00:36 +00:00
|
|
|
}
|
|
|
|
|
|
2021-04-30 15:21:49 +00:00
|
|
|
$title = Title::castFromPageIdentity( $pageIdentity );
|
Hooks::run() call site migration
Migrate all callers of Hooks::run() to use the new
HookContainer/HookRunner system.
General principles:
* Use DI if it is already used. We're not changing the way state is
managed in this patch.
* HookContainer is always injected, not HookRunner. HookContainer
is a service, it's a more generic interface, it is the only
thing that provides isRegistered() which is needed in some cases,
and a HookRunner can be efficiently constructed from it
(confirmed by benchmark). Because HookContainer is needed
for object construction, it is also needed by all factories.
* "Ask your friendly local base class". Big hierarchies like
SpecialPage and ApiBase have getHookContainer() and getHookRunner()
methods in the base class, and classes that extend that base class
are not expected to know or care where the base class gets its
HookContainer from.
* ProtectedHookAccessorTrait provides protected getHookContainer() and
getHookRunner() methods, getting them from the global service
container. The point of this is to ease migration to DI by ensuring
that call sites ask their local friendly base class rather than
getting a HookRunner from the service container directly.
* Private $this->hookRunner. In some smaller classes where accessor
methods did not seem warranted, there is a private HookRunner property
which is accessed directly. Very rarely (two cases), there is a
protected property, for consistency with code that conventionally
assumes protected=private, but in cases where the class might actually
be overridden, a protected accessor is preferred over a protected
property.
* The last resort: Hooks::runner(). Mostly for static, file-scope and
global code. In a few cases it was used for objects with broken
construction schemes, out of horror or laziness.
Constructors with new required arguments:
* AuthManager
* BadFileLookup
* BlockManager
* ClassicInterwikiLookup
* ContentHandlerFactory
* ContentSecurityPolicy
* DefaultOptionsManager
* DerivedPageDataUpdater
* FullSearchResultWidget
* HtmlCacheUpdater
* LanguageFactory
* LanguageNameUtils
* LinkRenderer
* LinkRendererFactory
* LocalisationCache
* MagicWordFactory
* MessageCache
* NamespaceInfo
* PageEditStash
* PageHandlerFactory
* PageUpdater
* ParserFactory
* PermissionManager
* RevisionStore
* RevisionStoreFactory
* SearchEngineConfig
* SearchEngineFactory
* SearchFormWidget
* SearchNearMatcher
* SessionBackend
* SpecialPageFactory
* UserNameUtils
* UserOptionsManager
* WatchedItemQueryService
* WatchedItemStore
Constructors with new optional arguments:
* DefaultPreferencesFactory
* Language
* LinkHolderArray
* MovePage
* Parser
* ParserCache
* PasswordReset
* Router
setHookContainer() now required after construction:
* AuthenticationProvider
* ResourceLoaderModule
* SearchEngine
Change-Id: Id442b0dbe43aba84bd5cf801d86dedc768b082c7
2020-03-19 02:42:09 +00:00
|
|
|
return $this->hookRunner->onAfterImportPage( $title, $foreignTitle,
|
|
|
|
|
$revCount, $sRevCount, $pageInfo );
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Alternate per-revision callback, for debugging.
|
2017-08-11 00:23:16 +00:00
|
|
|
* @param WikiRevision &$revision
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
|
|
|
|
public function debugRevisionHandler( &$revision ) {
|
|
|
|
|
$this->debug( "Got revision:" );
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( is_object( $revision->title ) ) {
|
2011-01-31 07:07:43 +00:00
|
|
|
$this->debug( "-- Title: " . $revision->title->getPrefixedText() );
|
|
|
|
|
} else {
|
|
|
|
|
$this->debug( "-- Title: <invalid>" );
|
|
|
|
|
}
|
|
|
|
|
$this->debug( "-- User: " . $revision->user_text );
|
|
|
|
|
$this->debug( "-- Timestamp: " . $revision->timestamp );
|
|
|
|
|
$this->debug( "-- Comment: " . $revision->comment );
|
|
|
|
|
$this->debug( "-- Text: " . $revision->text );
|
|
|
|
|
}
|
|
|
|
|
|
2014-12-10 11:24:47 +00:00
|
|
|
/**
|
|
|
|
|
* Notify the callback function of site info
|
|
|
|
|
* @param array $siteInfo
|
2020-11-21 01:44:41 +00:00
|
|
|
* @return mixed|false
|
2014-12-10 11:24:47 +00:00
|
|
|
*/
|
|
|
|
|
private function siteInfoCallback( $siteInfo ) {
|
|
|
|
|
if ( isset( $this->mSiteInfoCallback ) ) {
|
2021-02-08 00:09:15 +00:00
|
|
|
return call_user_func_array(
|
|
|
|
|
$this->mSiteInfoCallback,
|
|
|
|
|
[ $siteInfo, $this ]
|
|
|
|
|
);
|
2014-12-10 11:24:47 +00:00
|
|
|
} else {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2011-01-31 07:07:43 +00:00
|
|
|
/**
|
2012-07-10 12:48:06 +00:00
|
|
|
* Notify the callback function when a new "<page>" is reached.
|
2019-06-29 16:00:56 +00:00
|
|
|
* @param array $title
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
2020-05-09 11:35:40 +00:00
|
|
|
public function pageCallback( $title ) {
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( isset( $this->mPageCallback ) ) {
|
2011-01-31 07:07:43 +00:00
|
|
|
call_user_func( $this->mPageCallback, $title );
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2012-07-10 12:48:06 +00:00
|
|
|
* Notify the callback function when a "</page>" is closed.
|
2021-04-30 15:21:49 +00:00
|
|
|
* @param PageIdentity $pageIdentity
|
2014-12-10 11:24:47 +00:00
|
|
|
* @param ForeignTitle $foreignTitle
|
2014-04-23 09:30:40 +00:00
|
|
|
* @param int $revCount
|
|
|
|
|
* @param int $sucCount Number of revisions for which callback returned true
|
|
|
|
|
* @param array $pageInfo Associative array of page information
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
2021-04-30 15:21:49 +00:00
|
|
|
private function pageOutCallback( PageIdentity $pageIdentity, $foreignTitle, $revCount,
|
2014-12-10 11:24:47 +00:00
|
|
|
$sucCount, $pageInfo ) {
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( isset( $this->mPageOutCallback ) ) {
|
Get rid of unnecessary func_get_args() and friends
HHVM does not support variadic arguments with type hints. This is
mostly not a big problem, because we can just drop the type hint, but
for some reason PHPUnit adds a type hint of "array" when it creates
mocks, so a class with a variadic method can't be mocked (at least in
some cases). As such, I left alone all the classes that seem like
someone might like to mock them, like Title and User. If anyone wants
to mock them in the future, they'll have to switch back to
func_get_args(). Some of the changes are definitely safe, like
functions and test classes.
In most cases, func_get_args() (and/or func_get_arg(), func_num_args() )
were only present because the code was written before we required PHP
5.6, and writing them as variadic functions is strictly superior. In
some cases I left them alone, aside from HHVM compatibility:
* Forwarding all arguments to another function. It's useful to keep
func_get_args() here where we want to keep the list of expected
arguments and their meanings in the function signature line for
documentation purposes, but don't want to copy-paste a long line of
argument names.
* Handling deprecated calling conventions.
* One or two miscellaneous cases where we're basically using the
arguments individually but want to use them as an array as well for
some reason.
Change-Id: I066ec95a7beb7c0665146195a08e7cce1222c788
2018-10-08 14:10:45 +00:00
|
|
|
call_user_func_array( $this->mPageOutCallback, func_get_args() );
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Notify the callback function of a revision
|
2014-04-23 09:30:40 +00:00
|
|
|
* @param WikiRevision $revision
|
2012-02-09 19:30:01 +00:00
|
|
|
* @return bool|mixed
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
|
|
|
|
private function revisionCallback( $revision ) {
|
|
|
|
|
if ( isset( $this->mRevisionCallback ) ) {
|
2021-02-08 00:09:15 +00:00
|
|
|
return call_user_func_array(
|
|
|
|
|
$this->mRevisionCallback,
|
|
|
|
|
[ $revision, $this ]
|
|
|
|
|
);
|
2011-01-31 07:07:43 +00:00
|
|
|
} else {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Notify the callback function of a new log item
|
2014-04-23 09:30:40 +00:00
|
|
|
* @param WikiRevision $revision
|
2020-11-21 01:44:41 +00:00
|
|
|
* @return mixed|false
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
|
|
|
|
private function logItemCallback( $revision ) {
|
|
|
|
|
if ( isset( $this->mLogItemCallback ) ) {
|
2021-02-08 00:09:15 +00:00
|
|
|
return call_user_func_array(
|
|
|
|
|
$this->mLogItemCallback,
|
|
|
|
|
[ $revision, $this ]
|
|
|
|
|
);
|
2011-01-31 07:07:43 +00:00
|
|
|
} else {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-05-19 12:23:32 +00:00
|
|
|
/**
|
|
|
|
|
* Retrieves the contents of the named attribute of the current element.
|
2014-07-24 17:42:24 +00:00
|
|
|
* @param string $attr The name of the attribute
|
2015-02-27 21:53:56 +00:00
|
|
|
* @return string The value of the attribute or an empty string if it is not set in the current
|
|
|
|
|
* element.
|
2014-05-19 12:23:32 +00:00
|
|
|
*/
|
|
|
|
|
public function nodeAttribute( $attr ) {
|
|
|
|
|
return $this->reader->getAttribute( $attr );
|
|
|
|
|
}
|
|
|
|
|
|
2011-01-31 07:07:43 +00:00
|
|
|
/**
|
|
|
|
|
* Shouldn't something like this be built-in to XMLReader?
|
|
|
|
|
* Fetches text contents of the current element, assuming
|
|
|
|
|
* no sub-elements or such scary things.
|
|
|
|
|
* @return string
|
2020-06-26 12:14:23 +00:00
|
|
|
* @internal
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
2014-05-13 12:22:27 +00:00
|
|
|
public function nodeContents() {
|
2013-04-20 22:49:30 +00:00
|
|
|
if ( $this->reader->isEmptyElement ) {
|
2011-02-03 01:25:09 +00:00
|
|
|
return "";
|
|
|
|
|
}
|
|
|
|
|
$buffer = "";
|
2013-04-20 22:49:30 +00:00
|
|
|
while ( $this->reader->read() ) {
|
2013-04-26 14:42:31 +00:00
|
|
|
switch ( $this->reader->nodeType ) {
|
2017-12-11 03:07:50 +00:00
|
|
|
case XMLReader::TEXT:
|
|
|
|
|
case XMLReader::CDATA:
|
|
|
|
|
case XMLReader::SIGNIFICANT_WHITESPACE:
|
|
|
|
|
$buffer .= $this->reader->value;
|
|
|
|
|
break;
|
|
|
|
|
case XMLReader::END_ELEMENT:
|
|
|
|
|
return $buffer;
|
2011-02-03 01:25:09 +00:00
|
|
|
}
|
|
|
|
|
}
|
2011-03-04 19:13:59 +00:00
|
|
|
|
2011-02-03 01:25:09 +00:00
|
|
|
$this->reader->close();
|
|
|
|
|
return '';
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Primary entry point
|
2017-11-14 10:32:59 +00:00
|
|
|
* @throws Exception
|
2012-10-07 23:35:26 +00:00
|
|
|
* @throws MWException
|
2012-02-09 21:33:27 +00:00
|
|
|
* @return bool
|
2011-01-31 07:07:43 +00:00
|
|
|
*/
|
|
|
|
|
public function doImport() {
|
2021-01-27 16:06:29 +00:00
|
|
|
// Calls to reader->read need to be wrapped in calls to
|
|
|
|
|
// libxml_disable_entity_loader() to avoid local file
|
|
|
|
|
// inclusion attacks (T48932).
|
|
|
|
|
$oldDisable = libxml_disable_entity_loader( true );
|
2020-04-06 09:16:17 +00:00
|
|
|
try {
|
|
|
|
|
$this->reader->read();
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2020-04-06 09:16:17 +00:00
|
|
|
if ( $this->reader->localName != 'mediawiki' ) {
|
2021-01-27 16:06:29 +00:00
|
|
|
libxml_disable_entity_loader( $oldDisable );
|
2020-04-06 09:16:17 +00:00
|
|
|
throw new MWException( "Expected <mediawiki> tag, got " .
|
|
|
|
|
$this->reader->localName );
|
|
|
|
|
}
|
|
|
|
|
$this->debug( "<mediawiki> tag is correct." );
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2020-04-06 09:16:17 +00:00
|
|
|
$this->debug( "Starting primary dump processing loop." );
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2020-04-06 09:16:17 +00:00
|
|
|
$keepReading = $this->reader->read();
|
|
|
|
|
$skip = false;
|
|
|
|
|
$pageCount = 0;
|
2015-02-11 11:06:25 +00:00
|
|
|
while ( $keepReading ) {
|
2014-07-26 08:55:29 +00:00
|
|
|
$tag = $this->reader->localName;
|
2017-05-26 01:54:32 +00:00
|
|
|
if ( $this->pageOffset ) {
|
|
|
|
|
if ( $tag === 'page' ) {
|
|
|
|
|
$pageCount++;
|
|
|
|
|
}
|
|
|
|
|
if ( $pageCount < $this->pageOffset ) {
|
|
|
|
|
$keepReading = $this->reader->next();
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-02-11 11:06:25 +00:00
|
|
|
$type = $this->reader->nodeType;
|
|
|
|
|
|
Hooks::run() call site migration
Migrate all callers of Hooks::run() to use the new
HookContainer/HookRunner system.
General principles:
* Use DI if it is already used. We're not changing the way state is
managed in this patch.
* HookContainer is always injected, not HookRunner. HookContainer
is a service, it's a more generic interface, it is the only
thing that provides isRegistered() which is needed in some cases,
and a HookRunner can be efficiently constructed from it
(confirmed by benchmark). Because HookContainer is needed
for object construction, it is also needed by all factories.
* "Ask your friendly local base class". Big hierarchies like
SpecialPage and ApiBase have getHookContainer() and getHookRunner()
methods in the base class, and classes that extend that base class
are not expected to know or care where the base class gets its
HookContainer from.
* ProtectedHookAccessorTrait provides protected getHookContainer() and
getHookRunner() methods, getting them from the global service
container. The point of this is to ease migration to DI by ensuring
that call sites ask their local friendly base class rather than
getting a HookRunner from the service container directly.
* Private $this->hookRunner. In some smaller classes where accessor
methods did not seem warranted, there is a private HookRunner property
which is accessed directly. Very rarely (two cases), there is a
protected property, for consistency with code that conventionally
assumes protected=private, but in cases where the class might actually
be overridden, a protected accessor is preferred over a protected
property.
* The last resort: Hooks::runner(). Mostly for static, file-scope and
global code. In a few cases it was used for objects with broken
construction schemes, out of horror or laziness.
Constructors with new required arguments:
* AuthManager
* BadFileLookup
* BlockManager
* ClassicInterwikiLookup
* ContentHandlerFactory
* ContentSecurityPolicy
* DefaultOptionsManager
* DerivedPageDataUpdater
* FullSearchResultWidget
* HtmlCacheUpdater
* LanguageFactory
* LanguageNameUtils
* LinkRenderer
* LinkRendererFactory
* LocalisationCache
* MagicWordFactory
* MessageCache
* NamespaceInfo
* PageEditStash
* PageHandlerFactory
* PageUpdater
* ParserFactory
* PermissionManager
* RevisionStore
* RevisionStoreFactory
* SearchEngineConfig
* SearchEngineFactory
* SearchFormWidget
* SearchNearMatcher
* SessionBackend
* SpecialPageFactory
* UserNameUtils
* UserOptionsManager
* WatchedItemQueryService
* WatchedItemStore
Constructors with new optional arguments:
* DefaultPreferencesFactory
* Language
* LinkHolderArray
* MovePage
* Parser
* ParserCache
* PasswordReset
* Router
setHookContainer() now required after construction:
* AuthenticationProvider
* ResourceLoaderModule
* SearchEngine
Change-Id: Id442b0dbe43aba84bd5cf801d86dedc768b082c7
2020-03-19 02:42:09 +00:00
|
|
|
if ( !$this->hookRunner->onImportHandleToplevelXMLTag( $this ) ) {
|
2015-02-11 11:06:25 +00:00
|
|
|
// Do nothing
|
|
|
|
|
} elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
|
|
|
|
|
break;
|
|
|
|
|
} elseif ( $tag == 'siteinfo' ) {
|
|
|
|
|
$this->handleSiteInfo();
|
|
|
|
|
} elseif ( $tag == 'page' ) {
|
|
|
|
|
$this->handlePage();
|
|
|
|
|
} elseif ( $tag == 'logitem' ) {
|
|
|
|
|
$this->handleLogItem();
|
|
|
|
|
} elseif ( $tag != '#text' ) {
|
|
|
|
|
$this->warn( "Unhandled top-level XML tag $tag" );
|
|
|
|
|
|
|
|
|
|
$skip = true;
|
|
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2015-02-11 11:06:25 +00:00
|
|
|
if ( $skip ) {
|
|
|
|
|
$keepReading = $this->reader->next();
|
|
|
|
|
$skip = false;
|
|
|
|
|
$this->debug( "Skip" );
|
|
|
|
|
} else {
|
|
|
|
|
$keepReading = $this->reader->read();
|
|
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
2020-11-20 02:37:08 +00:00
|
|
|
} finally {
|
2021-01-27 16:06:29 +00:00
|
|
|
libxml_disable_entity_loader( $oldDisable );
|
2020-11-20 02:37:08 +00:00
|
|
|
$this->reader->close();
|
2015-02-11 11:06:25 +00:00
|
|
|
}
|
|
|
|
|
|
2011-01-31 07:07:43 +00:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function handleSiteInfo() {
|
2014-12-10 11:24:47 +00:00
|
|
|
$this->debug( "Enter site info handler." );
|
2016-02-17 09:09:32 +00:00
|
|
|
$siteInfo = [];
|
2014-12-10 11:24:47 +00:00
|
|
|
|
|
|
|
|
// Fields that can just be stuffed in the siteInfo object
|
2016-02-17 09:09:32 +00:00
|
|
|
$normalFields = [ 'sitename', 'base', 'generator', 'case' ];
|
2014-12-10 11:24:47 +00:00
|
|
|
|
|
|
|
|
while ( $this->reader->read() ) {
|
2016-03-18 13:55:54 +00:00
|
|
|
if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
|
2014-07-26 08:55:29 +00:00
|
|
|
$this->reader->localName == 'siteinfo' ) {
|
2014-12-10 11:24:47 +00:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2014-07-26 08:55:29 +00:00
|
|
|
$tag = $this->reader->localName;
|
2014-12-10 11:24:47 +00:00
|
|
|
|
|
|
|
|
if ( $tag == 'namespace' ) {
|
2015-06-17 20:01:00 +00:00
|
|
|
$this->foreignNamespaces[$this->nodeAttribute( 'key' )] =
|
2014-12-10 11:24:47 +00:00
|
|
|
$this->nodeContents();
|
|
|
|
|
} elseif ( in_array( $tag, $normalFields ) ) {
|
|
|
|
|
$siteInfo[$tag] = $this->nodeContents();
|
|
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
2014-12-10 11:24:47 +00:00
|
|
|
|
|
|
|
|
$siteInfo['_namespaces'] = $this->foreignNamespaces;
|
|
|
|
|
$this->siteInfoCallback( $siteInfo );
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function handleLogItem() {
|
|
|
|
|
$this->debug( "Enter log item handler." );
|
2016-02-17 09:09:32 +00:00
|
|
|
$logInfo = [];
|
2011-01-31 07:07:43 +00:00
|
|
|
|
|
|
|
|
// Fields that can just be stuffed in the pageInfo object
|
2016-02-17 09:09:32 +00:00
|
|
|
$normalFields = [ 'id', 'comment', 'type', 'action', 'timestamp',
|
2021-02-08 00:09:15 +00:00
|
|
|
'logtitle', 'params' ];
|
2011-01-31 07:07:43 +00:00
|
|
|
|
|
|
|
|
while ( $this->reader->read() ) {
|
2014-08-24 06:52:38 +00:00
|
|
|
if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
|
2014-07-26 08:55:29 +00:00
|
|
|
$this->reader->localName == 'logitem' ) {
|
2011-01-31 07:07:43 +00:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2014-07-26 08:55:29 +00:00
|
|
|
$tag = $this->reader->localName;
|
2011-01-31 07:07:43 +00:00
|
|
|
|
Hooks::run() call site migration
Migrate all callers of Hooks::run() to use the new
HookContainer/HookRunner system.
General principles:
* Use DI if it is already used. We're not changing the way state is
managed in this patch.
* HookContainer is always injected, not HookRunner. HookContainer
is a service, it's a more generic interface, it is the only
thing that provides isRegistered() which is needed in some cases,
and a HookRunner can be efficiently constructed from it
(confirmed by benchmark). Because HookContainer is needed
for object construction, it is also needed by all factories.
* "Ask your friendly local base class". Big hierarchies like
SpecialPage and ApiBase have getHookContainer() and getHookRunner()
methods in the base class, and classes that extend that base class
are not expected to know or care where the base class gets its
HookContainer from.
* ProtectedHookAccessorTrait provides protected getHookContainer() and
getHookRunner() methods, getting them from the global service
container. The point of this is to ease migration to DI by ensuring
that call sites ask their local friendly base class rather than
getting a HookRunner from the service container directly.
* Private $this->hookRunner. In some smaller classes where accessor
methods did not seem warranted, there is a private HookRunner property
which is accessed directly. Very rarely (two cases), there is a
protected property, for consistency with code that conventionally
assumes protected=private, but in cases where the class might actually
be overridden, a protected accessor is preferred over a protected
property.
* The last resort: Hooks::runner(). Mostly for static, file-scope and
global code. In a few cases it was used for objects with broken
construction schemes, out of horror or laziness.
Constructors with new required arguments:
* AuthManager
* BadFileLookup
* BlockManager
* ClassicInterwikiLookup
* ContentHandlerFactory
* ContentSecurityPolicy
* DefaultOptionsManager
* DerivedPageDataUpdater
* FullSearchResultWidget
* HtmlCacheUpdater
* LanguageFactory
* LanguageNameUtils
* LinkRenderer
* LinkRendererFactory
* LocalisationCache
* MagicWordFactory
* MessageCache
* NamespaceInfo
* PageEditStash
* PageHandlerFactory
* PageUpdater
* ParserFactory
* PermissionManager
* RevisionStore
* RevisionStoreFactory
* SearchEngineConfig
* SearchEngineFactory
* SearchFormWidget
* SearchNearMatcher
* SessionBackend
* SpecialPageFactory
* UserNameUtils
* UserOptionsManager
* WatchedItemQueryService
* WatchedItemStore
Constructors with new optional arguments:
* DefaultPreferencesFactory
* Language
* LinkHolderArray
* MovePage
* Parser
* ParserCache
* PasswordReset
* Router
setHookContainer() now required after construction:
* AuthenticationProvider
* ResourceLoaderModule
* SearchEngine
Change-Id: Id442b0dbe43aba84bd5cf801d86dedc768b082c7
2020-03-19 02:42:09 +00:00
|
|
|
if ( !$this->hookRunner->onImportHandleLogItemXMLTag( $this, $logInfo ) ) {
|
2011-01-31 07:07:43 +00:00
|
|
|
// Do nothing
|
|
|
|
|
} elseif ( in_array( $tag, $normalFields ) ) {
|
|
|
|
|
$logInfo[$tag] = $this->nodeContents();
|
|
|
|
|
} elseif ( $tag == 'contributor' ) {
|
|
|
|
|
$logInfo['contributor'] = $this->handleContributor();
|
|
|
|
|
} elseif ( $tag != '#text' ) {
|
|
|
|
|
$this->warn( "Unhandled log-item XML tag $tag" );
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$this->processLogItem( $logInfo );
|
|
|
|
|
}
|
|
|
|
|
|
2011-10-26 03:44:47 +00:00
|
|
|
/**
|
2014-04-23 09:30:40 +00:00
|
|
|
* @param array $logInfo
|
2020-11-21 01:44:41 +00:00
|
|
|
* @return mixed|false
|
2011-10-26 03:44:47 +00:00
|
|
|
*/
|
2011-01-31 07:07:43 +00:00
|
|
|
private function processLogItem( $logInfo ) {
|
2014-08-23 07:40:00 +00:00
|
|
|
$revision = new WikiRevision( $this->config );
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2015-12-19 09:06:26 +00:00
|
|
|
if ( isset( $logInfo['id'] ) ) {
|
|
|
|
|
$revision->setID( $logInfo['id'] );
|
|
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
$revision->setType( $logInfo['type'] );
|
|
|
|
|
$revision->setAction( $logInfo['action'] );
|
2015-12-19 09:06:26 +00:00
|
|
|
if ( isset( $logInfo['timestamp'] ) ) {
|
|
|
|
|
$revision->setTimestamp( $logInfo['timestamp'] );
|
|
|
|
|
}
|
|
|
|
|
if ( isset( $logInfo['params'] ) ) {
|
|
|
|
|
$revision->setParams( $logInfo['params'] );
|
|
|
|
|
}
|
|
|
|
|
if ( isset( $logInfo['logtitle'] ) ) {
|
|
|
|
|
// @todo Using Title for non-local titles is a recipe for disaster.
|
|
|
|
|
// We should use ForeignTitle here instead.
|
|
|
|
|
$revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
|
|
|
|
|
}
|
|
|
|
|
|
2011-08-02 14:05:01 +00:00
|
|
|
$revision->setNoUpdates( $this->mNoUpdates );
|
2011-01-31 07:07:43 +00:00
|
|
|
|
|
|
|
|
if ( isset( $logInfo['comment'] ) ) {
|
|
|
|
|
$revision->setComment( $logInfo['comment'] );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( isset( $logInfo['contributor']['ip'] ) ) {
|
|
|
|
|
$revision->setUserIP( $logInfo['contributor']['ip'] );
|
|
|
|
|
}
|
2015-12-19 09:06:26 +00:00
|
|
|
|
|
|
|
|
if ( !isset( $logInfo['contributor']['username'] ) ) {
|
2018-01-11 12:20:35 +00:00
|
|
|
$revision->setUsername( $this->externalUserNames->addPrefix( 'Unknown user' ) );
|
2015-12-19 09:06:26 +00:00
|
|
|
} else {
|
2018-01-11 12:20:35 +00:00
|
|
|
$revision->setUsername(
|
|
|
|
|
$this->externalUserNames->applyPrefix( $logInfo['contributor']['username'] )
|
|
|
|
|
);
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $this->logItemCallback( $revision );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function handlePage() {
|
|
|
|
|
// Handle page data.
|
|
|
|
|
$this->debug( "Enter page handler." );
|
2016-02-17 09:09:32 +00:00
|
|
|
$pageInfo = [ 'revisionCount' => 0, 'successfulRevisionCount' => 0 ];
|
2011-01-31 07:07:43 +00:00
|
|
|
|
|
|
|
|
// Fields that can just be stuffed in the pageInfo object
|
2016-02-17 09:09:32 +00:00
|
|
|
$normalFields = [ 'title', 'ns', 'id', 'redirect', 'restrictions' ];
|
2011-01-31 07:07:43 +00:00
|
|
|
|
|
|
|
|
$skip = false;
|
|
|
|
|
$badTitle = false;
|
|
|
|
|
|
|
|
|
|
while ( $skip ? $this->reader->next() : $this->reader->read() ) {
|
2014-08-24 06:52:38 +00:00
|
|
|
if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
|
2014-07-26 08:55:29 +00:00
|
|
|
$this->reader->localName == 'page' ) {
|
2011-01-31 07:07:43 +00:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2014-12-10 11:24:47 +00:00
|
|
|
$skip = false;
|
|
|
|
|
|
2014-07-26 08:55:29 +00:00
|
|
|
$tag = $this->reader->localName;
|
2011-01-31 07:07:43 +00:00
|
|
|
|
|
|
|
|
if ( $badTitle ) {
|
|
|
|
|
// The title is invalid, bail out of this page
|
|
|
|
|
$skip = true;
|
Hooks::run() call site migration
Migrate all callers of Hooks::run() to use the new
HookContainer/HookRunner system.
General principles:
* Use DI if it is already used. We're not changing the way state is
managed in this patch.
* HookContainer is always injected, not HookRunner. HookContainer
is a service, it's a more generic interface, it is the only
thing that provides isRegistered() which is needed in some cases,
and a HookRunner can be efficiently constructed from it
(confirmed by benchmark). Because HookContainer is needed
for object construction, it is also needed by all factories.
* "Ask your friendly local base class". Big hierarchies like
SpecialPage and ApiBase have getHookContainer() and getHookRunner()
methods in the base class, and classes that extend that base class
are not expected to know or care where the base class gets its
HookContainer from.
* ProtectedHookAccessorTrait provides protected getHookContainer() and
getHookRunner() methods, getting them from the global service
container. The point of this is to ease migration to DI by ensuring
that call sites ask their local friendly base class rather than
getting a HookRunner from the service container directly.
* Private $this->hookRunner. In some smaller classes where accessor
methods did not seem warranted, there is a private HookRunner property
which is accessed directly. Very rarely (two cases), there is a
protected property, for consistency with code that conventionally
assumes protected=private, but in cases where the class might actually
be overridden, a protected accessor is preferred over a protected
property.
* The last resort: Hooks::runner(). Mostly for static, file-scope and
global code. In a few cases it was used for objects with broken
construction schemes, out of horror or laziness.
Constructors with new required arguments:
* AuthManager
* BadFileLookup
* BlockManager
* ClassicInterwikiLookup
* ContentHandlerFactory
* ContentSecurityPolicy
* DefaultOptionsManager
* DerivedPageDataUpdater
* FullSearchResultWidget
* HtmlCacheUpdater
* LanguageFactory
* LanguageNameUtils
* LinkRenderer
* LinkRendererFactory
* LocalisationCache
* MagicWordFactory
* MessageCache
* NamespaceInfo
* PageEditStash
* PageHandlerFactory
* PageUpdater
* ParserFactory
* PermissionManager
* RevisionStore
* RevisionStoreFactory
* SearchEngineConfig
* SearchEngineFactory
* SearchFormWidget
* SearchNearMatcher
* SessionBackend
* SpecialPageFactory
* UserNameUtils
* UserOptionsManager
* WatchedItemQueryService
* WatchedItemStore
Constructors with new optional arguments:
* DefaultPreferencesFactory
* Language
* LinkHolderArray
* MovePage
* Parser
* ParserCache
* PasswordReset
* Router
setHookContainer() now required after construction:
* AuthenticationProvider
* ResourceLoaderModule
* SearchEngine
Change-Id: Id442b0dbe43aba84bd5cf801d86dedc768b082c7
2020-03-19 02:42:09 +00:00
|
|
|
} elseif ( !$this->hookRunner->onImportHandlePageXMLTag( $this, $pageInfo ) ) {
|
2011-01-31 07:07:43 +00:00
|
|
|
// Do nothing
|
|
|
|
|
} elseif ( in_array( $tag, $normalFields ) ) {
|
2014-05-19 12:23:32 +00:00
|
|
|
// An XML snippet:
|
|
|
|
|
// <page>
|
|
|
|
|
// <id>123</id>
|
|
|
|
|
// <title>Page</title>
|
|
|
|
|
// <redirect title="NewTitle"/>
|
|
|
|
|
// ...
|
|
|
|
|
// Because the redirect tag is built differently, we need special handling for that case.
|
|
|
|
|
if ( $tag == 'redirect' ) {
|
|
|
|
|
$pageInfo[$tag] = $this->nodeAttribute( 'title' );
|
|
|
|
|
} else {
|
|
|
|
|
$pageInfo[$tag] = $this->nodeContents();
|
2014-12-10 11:24:47 +00:00
|
|
|
}
|
|
|
|
|
} elseif ( $tag == 'revision' || $tag == 'upload' ) {
|
|
|
|
|
if ( !isset( $title ) ) {
|
|
|
|
|
$title = $this->processTitle( $pageInfo['title'],
|
2017-10-06 22:17:58 +00:00
|
|
|
$pageInfo['ns'] ?? null );
|
2014-12-10 11:24:47 +00:00
|
|
|
|
2015-09-08 14:18:03 +00:00
|
|
|
// $title is either an array of two titles or false.
|
|
|
|
|
if ( is_array( $title ) ) {
|
|
|
|
|
$this->pageCallback( $title );
|
|
|
|
|
list( $pageInfo['_title'], $foreignTitle ) = $title;
|
|
|
|
|
} else {
|
2014-12-10 11:24:47 +00:00
|
|
|
$badTitle = true;
|
|
|
|
|
$skip = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2014-12-10 11:24:47 +00:00
|
|
|
if ( $title ) {
|
|
|
|
|
if ( $tag == 'revision' ) {
|
|
|
|
|
$this->handleRevision( $pageInfo );
|
|
|
|
|
} else {
|
|
|
|
|
$this->handleUpload( $pageInfo );
|
2014-05-19 12:23:32 +00:00
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
} elseif ( $tag != '#text' ) {
|
|
|
|
|
$this->warn( "Unhandled page XML tag $tag" );
|
|
|
|
|
$skip = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-08 14:18:03 +00:00
|
|
|
// @note $pageInfo is only set if a valid $title is processed above with
|
|
|
|
|
// no error. If we have a valid $title, then pageCallback is called
|
|
|
|
|
// above, $pageInfo['title'] is set and we do pageOutCallback here.
|
|
|
|
|
// If $pageInfo['_title'] is not set, then $foreignTitle is also not
|
|
|
|
|
// set since they both come from $title above.
|
|
|
|
|
if ( array_key_exists( '_title', $pageInfo ) ) {
|
2021-04-30 15:21:49 +00:00
|
|
|
/** @var Title $title */
|
|
|
|
|
$title = $pageInfo['_title'];
|
2021-02-08 00:09:15 +00:00
|
|
|
$this->pageOutCallback(
|
2021-04-30 15:21:49 +00:00
|
|
|
$title,
|
2021-02-08 00:09:15 +00:00
|
|
|
$foreignTitle,
|
|
|
|
|
$pageInfo['revisionCount'],
|
|
|
|
|
$pageInfo['successfulRevisionCount'],
|
|
|
|
|
$pageInfo
|
|
|
|
|
);
|
2015-09-08 14:18:03 +00:00
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
|
2011-10-26 03:44:47 +00:00
|
|
|
/**
|
2019-11-23 22:28:57 +00:00
|
|
|
* @param array &$pageInfo
|
2011-10-26 03:44:47 +00:00
|
|
|
*/
|
2011-01-31 07:07:43 +00:00
|
|
|
private function handleRevision( &$pageInfo ) {
|
|
|
|
|
$this->debug( "Enter revision handler" );
|
2016-02-17 09:09:32 +00:00
|
|
|
$revisionInfo = [];
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2020-04-06 09:16:17 +00:00
|
|
|
$normalFields = [ 'id', 'parentid', 'timestamp', 'comment', 'minor', 'origin',
|
|
|
|
|
'model', 'format', 'text', 'sha1' ];
|
2011-01-31 07:07:43 +00:00
|
|
|
|
|
|
|
|
$skip = false;
|
|
|
|
|
|
|
|
|
|
while ( $skip ? $this->reader->next() : $this->reader->read() ) {
|
2014-08-24 06:52:38 +00:00
|
|
|
if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
|
2014-07-26 08:55:29 +00:00
|
|
|
$this->reader->localName == 'revision' ) {
|
2011-01-31 07:07:43 +00:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2014-07-26 08:55:29 +00:00
|
|
|
$tag = $this->reader->localName;
|
2011-01-31 07:07:43 +00:00
|
|
|
|
Hooks::run() call site migration
Migrate all callers of Hooks::run() to use the new
HookContainer/HookRunner system.
General principles:
* Use DI if it is already used. We're not changing the way state is
managed in this patch.
* HookContainer is always injected, not HookRunner. HookContainer
is a service, it's a more generic interface, it is the only
thing that provides isRegistered() which is needed in some cases,
and a HookRunner can be efficiently constructed from it
(confirmed by benchmark). Because HookContainer is needed
for object construction, it is also needed by all factories.
* "Ask your friendly local base class". Big hierarchies like
SpecialPage and ApiBase have getHookContainer() and getHookRunner()
methods in the base class, and classes that extend that base class
are not expected to know or care where the base class gets its
HookContainer from.
* ProtectedHookAccessorTrait provides protected getHookContainer() and
getHookRunner() methods, getting them from the global service
container. The point of this is to ease migration to DI by ensuring
that call sites ask their local friendly base class rather than
getting a HookRunner from the service container directly.
* Private $this->hookRunner. In some smaller classes where accessor
methods did not seem warranted, there is a private HookRunner property
which is accessed directly. Very rarely (two cases), there is a
protected property, for consistency with code that conventionally
assumes protected=private, but in cases where the class might actually
be overridden, a protected accessor is preferred over a protected
property.
* The last resort: Hooks::runner(). Mostly for static, file-scope and
global code. In a few cases it was used for objects with broken
construction schemes, out of horror or laziness.
Constructors with new required arguments:
* AuthManager
* BadFileLookup
* BlockManager
* ClassicInterwikiLookup
* ContentHandlerFactory
* ContentSecurityPolicy
* DefaultOptionsManager
* DerivedPageDataUpdater
* FullSearchResultWidget
* HtmlCacheUpdater
* LanguageFactory
* LanguageNameUtils
* LinkRenderer
* LinkRendererFactory
* LocalisationCache
* MagicWordFactory
* MessageCache
* NamespaceInfo
* PageEditStash
* PageHandlerFactory
* PageUpdater
* ParserFactory
* PermissionManager
* RevisionStore
* RevisionStoreFactory
* SearchEngineConfig
* SearchEngineFactory
* SearchFormWidget
* SearchNearMatcher
* SessionBackend
* SpecialPageFactory
* UserNameUtils
* UserOptionsManager
* WatchedItemQueryService
* WatchedItemStore
Constructors with new optional arguments:
* DefaultPreferencesFactory
* Language
* LinkHolderArray
* MovePage
* Parser
* ParserCache
* PasswordReset
* Router
setHookContainer() now required after construction:
* AuthenticationProvider
* ResourceLoaderModule
* SearchEngine
Change-Id: Id442b0dbe43aba84bd5cf801d86dedc768b082c7
2020-03-19 02:42:09 +00:00
|
|
|
if ( !$this->hookRunner->onImportHandleRevisionXMLTag(
|
|
|
|
|
$this, $pageInfo, $revisionInfo )
|
|
|
|
|
) {
|
2011-01-31 07:07:43 +00:00
|
|
|
// Do nothing
|
|
|
|
|
} elseif ( in_array( $tag, $normalFields ) ) {
|
|
|
|
|
$revisionInfo[$tag] = $this->nodeContents();
|
2020-04-06 09:16:17 +00:00
|
|
|
} elseif ( $tag == 'content' ) {
|
|
|
|
|
// We can have multiple content tags, so make this an array.
|
|
|
|
|
$revisionInfo[$tag][] = $this->handleContent();
|
2011-01-31 07:07:43 +00:00
|
|
|
} elseif ( $tag == 'contributor' ) {
|
|
|
|
|
$revisionInfo['contributor'] = $this->handleContributor();
|
|
|
|
|
} elseif ( $tag != '#text' ) {
|
|
|
|
|
$this->warn( "Unhandled revision XML tag $tag" );
|
|
|
|
|
$skip = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$pageInfo['revisionCount']++;
|
|
|
|
|
if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
|
|
|
|
|
$pageInfo['successfulRevisionCount']++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-06 09:16:17 +00:00
|
|
|
private function handleContent() {
|
|
|
|
|
$this->debug( "Enter content handler" );
|
|
|
|
|
$contentInfo = [];
|
|
|
|
|
|
|
|
|
|
$normalFields = [ 'role', 'origin', 'model', 'format', 'text' ];
|
|
|
|
|
|
|
|
|
|
$skip = false;
|
|
|
|
|
|
|
|
|
|
while ( $skip ? $this->reader->next() : $this->reader->read() ) {
|
|
|
|
|
if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
|
|
|
|
|
$this->reader->localName == 'content' ) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$tag = $this->reader->localName;
|
|
|
|
|
|
|
|
|
|
if ( !$this->hookRunner->onImportHandleContentXMLTag(
|
|
|
|
|
$this, $contentInfo )
|
|
|
|
|
) {
|
|
|
|
|
// Do nothing
|
|
|
|
|
} elseif ( in_array( $tag, $normalFields ) ) {
|
|
|
|
|
$contentInfo[$tag] = $this->nodeContents();
|
|
|
|
|
} elseif ( $tag != '#text' ) {
|
|
|
|
|
$this->warn( "Unhandled content XML tag $tag" );
|
|
|
|
|
$skip = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $contentInfo;
|
|
|
|
|
}
|
|
|
|
|
|
2011-10-26 03:44:47 +00:00
|
|
|
/**
|
2020-04-06 09:16:17 +00:00
|
|
|
* @param Title $title
|
|
|
|
|
* @param int $revisionId
|
|
|
|
|
* @param array $contentInfo
|
|
|
|
|
*
|
|
|
|
|
* @return Content
|
2017-11-14 10:32:59 +00:00
|
|
|
* @throws MWException
|
2011-10-26 03:44:47 +00:00
|
|
|
*/
|
2020-04-06 09:16:17 +00:00
|
|
|
private function makeContent( Title $title, $revisionId, $contentInfo ) {
|
2015-12-23 09:31:37 +00:00
|
|
|
global $wgMaxArticleSize;
|
|
|
|
|
|
2020-04-06 09:16:17 +00:00
|
|
|
if ( !isset( $contentInfo['text'] ) ) {
|
|
|
|
|
throw new MWException( 'Missing text field in import.' );
|
|
|
|
|
}
|
|
|
|
|
|
2015-12-23 09:31:37 +00:00
|
|
|
// Make sure revisions won't violate $wgMaxArticleSize, which could lead to
|
|
|
|
|
// database errors and instability. Testing for revisions with only listed
|
|
|
|
|
// content models, as other content models might use serialization formats
|
|
|
|
|
// which aren't checked against $wgMaxArticleSize.
|
2020-04-06 09:16:17 +00:00
|
|
|
if ( ( !isset( $contentInfo['model'] ) ||
|
|
|
|
|
in_array( $contentInfo['model'], [
|
|
|
|
|
'wikitext',
|
|
|
|
|
'css',
|
|
|
|
|
'json',
|
|
|
|
|
'javascript',
|
|
|
|
|
'text',
|
|
|
|
|
''
|
|
|
|
|
] ) ) &&
|
|
|
|
|
strlen( $contentInfo['text'] ) > $wgMaxArticleSize * 1024
|
2015-12-23 09:31:37 +00:00
|
|
|
) {
|
|
|
|
|
throw new MWException( 'The text of ' .
|
2020-04-06 09:16:17 +00:00
|
|
|
( $revisionId ?
|
|
|
|
|
"the revision with ID $revisionId" :
|
2015-12-23 09:31:37 +00:00
|
|
|
'a revision'
|
2019-09-09 08:49:23 +00:00
|
|
|
) . " exceeds the maximum allowable size ($wgMaxArticleSize KiB)" );
|
2015-12-23 09:31:37 +00:00
|
|
|
}
|
|
|
|
|
|
2020-04-06 09:16:17 +00:00
|
|
|
$role = $contentInfo['role'] ?? SlotRecord::MAIN;
|
|
|
|
|
$model = $contentInfo['model'] ?? $this->getDefaultContentModel( $title, $role );
|
|
|
|
|
$handler = $this->getContentHandler( $model );
|
|
|
|
|
|
|
|
|
|
$text = $handler->importTransform( $contentInfo['text'] );
|
|
|
|
|
|
2021-02-08 00:09:15 +00:00
|
|
|
return $handler->unserializeContent( $text );
|
2020-04-06 09:16:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param array $pageInfo
|
|
|
|
|
* @param array $revisionInfo
|
|
|
|
|
* @throws MWException
|
2020-11-21 01:44:41 +00:00
|
|
|
* @return mixed|false
|
2020-04-06 09:16:17 +00:00
|
|
|
*/
|
|
|
|
|
private function processRevision( $pageInfo, $revisionInfo ) {
|
2014-08-23 07:40:00 +00:00
|
|
|
$revision = new WikiRevision( $this->config );
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2020-04-06 09:16:17 +00:00
|
|
|
$revId = $revisionInfo['id'] ?? 0;
|
|
|
|
|
if ( $revId ) {
|
2011-04-04 19:06:01 +00:00
|
|
|
$revision->setID( $revisionInfo['id'] );
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-06 09:16:17 +00:00
|
|
|
$title = $pageInfo['_title'];
|
|
|
|
|
$revision->setTitle( $title );
|
2014-05-20 17:09:51 +00:00
|
|
|
|
2020-04-06 09:16:17 +00:00
|
|
|
$content = $this->makeContent( $title, $revId, $revisionInfo );
|
|
|
|
|
$revision->setContent( SlotRecord::MAIN, $content );
|
|
|
|
|
|
|
|
|
|
foreach ( $revisionInfo['content'] ?? [] as $slotInfo ) {
|
|
|
|
|
if ( !isset( $slotInfo['role'] ) ) {
|
|
|
|
|
throw new MWException( "Missing role for imported slot." );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$content = $this->makeContent( $title, $revId, $slotInfo );
|
|
|
|
|
$revision->setContent( $slotInfo['role'], $content );
|
2014-05-20 17:09:51 +00:00
|
|
|
}
|
2018-10-20 21:55:44 +00:00
|
|
|
$revision->setTimestamp( $revisionInfo['timestamp'] ?? wfTimestampNow() );
|
2011-01-31 07:07:43 +00:00
|
|
|
|
|
|
|
|
if ( isset( $revisionInfo['comment'] ) ) {
|
|
|
|
|
$revision->setComment( $revisionInfo['comment'] );
|
|
|
|
|
}
|
|
|
|
|
|
2011-04-04 19:06:01 +00:00
|
|
|
if ( isset( $revisionInfo['minor'] ) ) {
|
2011-01-31 07:07:43 +00:00
|
|
|
$revision->setMinor( true );
|
2011-04-04 19:06:01 +00:00
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
if ( isset( $revisionInfo['contributor']['ip'] ) ) {
|
|
|
|
|
$revision->setUserIP( $revisionInfo['contributor']['ip'] );
|
2015-12-21 12:50:20 +00:00
|
|
|
} elseif ( isset( $revisionInfo['contributor']['username'] ) ) {
|
2018-01-11 12:20:35 +00:00
|
|
|
$revision->setUsername(
|
|
|
|
|
$this->externalUserNames->applyPrefix( $revisionInfo['contributor']['username'] )
|
|
|
|
|
);
|
2015-12-21 12:50:20 +00:00
|
|
|
} else {
|
2018-01-11 12:20:35 +00:00
|
|
|
$revision->setUsername( $this->externalUserNames->addPrefix( 'Unknown user' ) );
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
2017-06-06 17:39:14 +00:00
|
|
|
if ( isset( $revisionInfo['sha1'] ) ) {
|
|
|
|
|
$revision->setSha1Base36( $revisionInfo['sha1'] );
|
|
|
|
|
}
|
2011-08-02 14:05:01 +00:00
|
|
|
$revision->setNoUpdates( $this->mNoUpdates );
|
2011-01-31 07:07:43 +00:00
|
|
|
|
|
|
|
|
return $this->revisionCallback( $revision );
|
|
|
|
|
}
|
|
|
|
|
|
2011-10-26 03:44:47 +00:00
|
|
|
/**
|
2019-11-23 22:28:57 +00:00
|
|
|
* @param array &$pageInfo
|
2011-10-26 03:44:47 +00:00
|
|
|
* @return mixed
|
|
|
|
|
*/
|
2011-01-31 07:07:43 +00:00
|
|
|
private function handleUpload( &$pageInfo ) {
|
|
|
|
|
$this->debug( "Enter upload handler" );
|
2016-02-17 09:09:32 +00:00
|
|
|
$uploadInfo = [];
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2016-02-17 09:09:32 +00:00
|
|
|
$normalFields = [ 'timestamp', 'comment', 'filename', 'text',
|
2021-02-08 00:09:15 +00:00
|
|
|
'src', 'size', 'sha1base36', 'archivename', 'rel' ];
|
2011-01-31 07:07:43 +00:00
|
|
|
|
|
|
|
|
$skip = false;
|
|
|
|
|
|
|
|
|
|
while ( $skip ? $this->reader->next() : $this->reader->read() ) {
|
2014-08-24 06:52:38 +00:00
|
|
|
if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
|
2014-07-26 08:55:29 +00:00
|
|
|
$this->reader->localName == 'upload' ) {
|
2011-01-31 07:07:43 +00:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2014-07-26 08:55:29 +00:00
|
|
|
$tag = $this->reader->localName;
|
2011-01-31 07:07:43 +00:00
|
|
|
|
Hooks::run() call site migration
Migrate all callers of Hooks::run() to use the new
HookContainer/HookRunner system.
General principles:
* Use DI if it is already used. We're not changing the way state is
managed in this patch.
* HookContainer is always injected, not HookRunner. HookContainer
is a service, it's a more generic interface, it is the only
thing that provides isRegistered() which is needed in some cases,
and a HookRunner can be efficiently constructed from it
(confirmed by benchmark). Because HookContainer is needed
for object construction, it is also needed by all factories.
* "Ask your friendly local base class". Big hierarchies like
SpecialPage and ApiBase have getHookContainer() and getHookRunner()
methods in the base class, and classes that extend that base class
are not expected to know or care where the base class gets its
HookContainer from.
* ProtectedHookAccessorTrait provides protected getHookContainer() and
getHookRunner() methods, getting them from the global service
container. The point of this is to ease migration to DI by ensuring
that call sites ask their local friendly base class rather than
getting a HookRunner from the service container directly.
* Private $this->hookRunner. In some smaller classes where accessor
methods did not seem warranted, there is a private HookRunner property
which is accessed directly. Very rarely (two cases), there is a
protected property, for consistency with code that conventionally
assumes protected=private, but in cases where the class might actually
be overridden, a protected accessor is preferred over a protected
property.
* The last resort: Hooks::runner(). Mostly for static, file-scope and
global code. In a few cases it was used for objects with broken
construction schemes, out of horror or laziness.
Constructors with new required arguments:
* AuthManager
* BadFileLookup
* BlockManager
* ClassicInterwikiLookup
* ContentHandlerFactory
* ContentSecurityPolicy
* DefaultOptionsManager
* DerivedPageDataUpdater
* FullSearchResultWidget
* HtmlCacheUpdater
* LanguageFactory
* LanguageNameUtils
* LinkRenderer
* LinkRendererFactory
* LocalisationCache
* MagicWordFactory
* MessageCache
* NamespaceInfo
* PageEditStash
* PageHandlerFactory
* PageUpdater
* ParserFactory
* PermissionManager
* RevisionStore
* RevisionStoreFactory
* SearchEngineConfig
* SearchEngineFactory
* SearchFormWidget
* SearchNearMatcher
* SessionBackend
* SpecialPageFactory
* UserNameUtils
* UserOptionsManager
* WatchedItemQueryService
* WatchedItemStore
Constructors with new optional arguments:
* DefaultPreferencesFactory
* Language
* LinkHolderArray
* MovePage
* Parser
* ParserCache
* PasswordReset
* Router
setHookContainer() now required after construction:
* AuthenticationProvider
* ResourceLoaderModule
* SearchEngine
Change-Id: Id442b0dbe43aba84bd5cf801d86dedc768b082c7
2020-03-19 02:42:09 +00:00
|
|
|
if ( !$this->hookRunner->onImportHandleUploadXMLTag( $this, $pageInfo ) ) {
|
2011-01-31 07:07:43 +00:00
|
|
|
// Do nothing
|
|
|
|
|
} elseif ( in_array( $tag, $normalFields ) ) {
|
|
|
|
|
$uploadInfo[$tag] = $this->nodeContents();
|
|
|
|
|
} elseif ( $tag == 'contributor' ) {
|
|
|
|
|
$uploadInfo['contributor'] = $this->handleContributor();
|
2011-04-12 19:25:56 +00:00
|
|
|
} elseif ( $tag == 'contents' ) {
|
|
|
|
|
$contents = $this->nodeContents();
|
|
|
|
|
$encoding = $this->reader->getAttribute( 'encoding' );
|
|
|
|
|
if ( $encoding === 'base64' ) {
|
|
|
|
|
$uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
|
2011-05-15 10:39:15 +00:00
|
|
|
$uploadInfo['isTempSrc'] = true;
|
2011-04-12 19:25:56 +00:00
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
} elseif ( $tag != '#text' ) {
|
|
|
|
|
$this->warn( "Unhandled upload XML tag $tag" );
|
|
|
|
|
$skip = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
2011-10-26 03:44:47 +00:00
|
|
|
|
2011-04-12 19:25:56 +00:00
|
|
|
if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
|
|
|
|
|
$path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
|
|
|
|
|
if ( file_exists( $path ) ) {
|
|
|
|
|
$uploadInfo['fileSrc'] = $path;
|
2011-05-15 10:39:15 +00:00
|
|
|
$uploadInfo['isTempSrc'] = false;
|
2011-04-12 19:25:56 +00:00
|
|
|
}
|
|
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2011-04-12 19:25:56 +00:00
|
|
|
if ( $this->mImportUploads ) {
|
|
|
|
|
return $this->processUpload( $pageInfo, $uploadInfo );
|
|
|
|
|
}
|
|
|
|
|
}
|
2011-10-26 03:44:47 +00:00
|
|
|
|
|
|
|
|
/**
|
2014-04-23 09:30:40 +00:00
|
|
|
* @param string $contents
|
2011-10-26 03:44:47 +00:00
|
|
|
* @return string
|
|
|
|
|
*/
|
2011-04-12 19:25:56 +00:00
|
|
|
private function dumpTemp( $contents ) {
|
|
|
|
|
$filename = tempnam( wfTempDir(), 'importupload' );
|
|
|
|
|
file_put_contents( $filename, $contents );
|
|
|
|
|
return $filename;
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
|
2011-10-26 03:44:47 +00:00
|
|
|
/**
|
2014-04-23 09:30:40 +00:00
|
|
|
* @param array $pageInfo
|
|
|
|
|
* @param array $uploadInfo
|
2011-10-26 03:44:47 +00:00
|
|
|
* @return mixed
|
|
|
|
|
*/
|
2011-01-31 07:07:43 +00:00
|
|
|
private function processUpload( $pageInfo, $uploadInfo ) {
|
2014-08-23 07:40:00 +00:00
|
|
|
$revision = new WikiRevision( $this->config );
|
2020-11-11 13:14:49 +00:00
|
|
|
$revId = $pageInfo['id'];
|
|
|
|
|
$title = $pageInfo['_title'];
|
|
|
|
|
$content = $this->makeContent( $title, $revId, $uploadInfo );
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2020-11-11 13:14:49 +00:00
|
|
|
$revision->setTitle( $title );
|
|
|
|
|
$revision->setID( $revId );
|
2011-01-31 07:07:43 +00:00
|
|
|
$revision->setTimestamp( $uploadInfo['timestamp'] );
|
2020-11-11 13:14:49 +00:00
|
|
|
$revision->setContent( SlotRecord::MAIN, $content );
|
2011-01-31 07:07:43 +00:00
|
|
|
$revision->setFilename( $uploadInfo['filename'] );
|
2011-04-12 19:25:56 +00:00
|
|
|
if ( isset( $uploadInfo['archivename'] ) ) {
|
|
|
|
|
$revision->setArchiveName( $uploadInfo['archivename'] );
|
|
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
$revision->setSrc( $uploadInfo['src'] );
|
2011-04-12 19:25:56 +00:00
|
|
|
if ( isset( $uploadInfo['fileSrc'] ) ) {
|
2011-05-15 10:39:15 +00:00
|
|
|
$revision->setFileSrc( $uploadInfo['fileSrc'],
|
2021-02-08 00:09:15 +00:00
|
|
|
!empty( $uploadInfo['isTempSrc'] )
|
|
|
|
|
);
|
2011-05-15 10:39:15 +00:00
|
|
|
}
|
|
|
|
|
if ( isset( $uploadInfo['sha1base36'] ) ) {
|
|
|
|
|
$revision->setSha1Base36( $uploadInfo['sha1base36'] );
|
2011-04-12 19:25:56 +00:00
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
$revision->setSize( intval( $uploadInfo['size'] ) );
|
|
|
|
|
$revision->setComment( $uploadInfo['comment'] );
|
|
|
|
|
|
|
|
|
|
if ( isset( $uploadInfo['contributor']['ip'] ) ) {
|
|
|
|
|
$revision->setUserIP( $uploadInfo['contributor']['ip'] );
|
|
|
|
|
}
|
|
|
|
|
if ( isset( $uploadInfo['contributor']['username'] ) ) {
|
2018-01-11 12:20:35 +00:00
|
|
|
$revision->setUsername(
|
|
|
|
|
$this->externalUserNames->applyPrefix( $uploadInfo['contributor']['username'] )
|
|
|
|
|
);
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
2011-08-02 14:05:01 +00:00
|
|
|
$revision->setNoUpdates( $this->mNoUpdates );
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2011-04-12 19:25:56 +00:00
|
|
|
return call_user_func( $this->mUploadCallback, $revision );
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
|
2011-10-26 03:44:47 +00:00
|
|
|
/**
|
|
|
|
|
* @return array
|
|
|
|
|
*/
|
2011-01-31 07:07:43 +00:00
|
|
|
private function handleContributor() {
|
2020-04-06 09:16:17 +00:00
|
|
|
$this->debug( "Enter contributor handler." );
|
2011-01-31 07:07:43 +00:00
|
|
|
|
2015-12-21 12:50:20 +00:00
|
|
|
if ( $this->reader->isEmptyElement ) {
|
2021-02-08 00:09:15 +00:00
|
|
|
return [];
|
2015-12-21 12:50:20 +00:00
|
|
|
}
|
2021-02-08 00:09:15 +00:00
|
|
|
|
|
|
|
|
$fields = [ 'id', 'ip', 'username' ];
|
|
|
|
|
$info = [];
|
|
|
|
|
|
2011-01-31 07:07:43 +00:00
|
|
|
while ( $this->reader->read() ) {
|
2014-08-24 06:52:38 +00:00
|
|
|
if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
|
2014-07-26 08:55:29 +00:00
|
|
|
$this->reader->localName == 'contributor' ) {
|
2011-01-31 07:07:43 +00:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2014-07-26 08:55:29 +00:00
|
|
|
$tag = $this->reader->localName;
|
2011-01-31 07:07:43 +00:00
|
|
|
|
|
|
|
|
if ( in_array( $tag, $fields ) ) {
|
|
|
|
|
$info[$tag] = $this->nodeContents();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return $info;
|
|
|
|
|
}
|
|
|
|
|
|
2011-09-06 00:01:06 +00:00
|
|
|
/**
|
2014-04-23 09:30:40 +00:00
|
|
|
* @param string $text
|
2014-12-10 11:24:47 +00:00
|
|
|
* @param string|null $ns
|
2020-11-21 01:44:41 +00:00
|
|
|
* @return array|false
|
2011-09-06 00:01:06 +00:00
|
|
|
*/
|
2014-12-10 11:24:47 +00:00
|
|
|
private function processTitle( $text, $ns = null ) {
|
2020-01-09 23:48:34 +00:00
|
|
|
if ( $this->foreignNamespaces === null ) {
|
2021-03-12 03:32:21 +00:00
|
|
|
$foreignTitleFactory = new NaiveForeignTitleFactory(
|
2021-05-14 23:16:34 +00:00
|
|
|
$this->contentLanguage
|
2021-03-12 03:32:21 +00:00
|
|
|
);
|
2011-01-31 07:07:43 +00:00
|
|
|
} else {
|
2014-12-10 11:24:47 +00:00
|
|
|
$foreignTitleFactory = new NamespaceAwareForeignTitleFactory(
|
|
|
|
|
$this->foreignNamespaces );
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
|
2014-12-10 11:24:47 +00:00
|
|
|
$foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
|
|
|
|
|
intval( $ns ) );
|
|
|
|
|
|
|
|
|
|
$title = $this->importTitleFactory->createTitleFromForeignTitle(
|
|
|
|
|
$foreignTitle );
|
|
|
|
|
|
2014-08-23 07:40:00 +00:00
|
|
|
$commandLineMode = $this->config->get( 'CommandLineMode' );
|
2020-01-09 23:48:34 +00:00
|
|
|
if ( $title === null ) {
|
2011-09-06 00:01:06 +00:00
|
|
|
# Invalid page title? Ignore the page
|
2014-12-10 11:24:47 +00:00
|
|
|
$this->notice( 'import-error-invalid', $foreignTitle->getFullText() );
|
2011-01-31 07:07:43 +00:00
|
|
|
return false;
|
2013-04-20 22:49:30 +00:00
|
|
|
} elseif ( $title->isExternal() ) {
|
2012-01-06 14:21:16 +00:00
|
|
|
$this->notice( 'import-error-interwiki', $title->getPrefixedText() );
|
2012-01-06 12:06:13 +00:00
|
|
|
return false;
|
2013-04-20 22:49:30 +00:00
|
|
|
} elseif ( !$title->canExist() ) {
|
2012-01-06 14:21:16 +00:00
|
|
|
$this->notice( 'import-error-special', $title->getPrefixedText() );
|
2011-01-31 07:07:43 +00:00
|
|
|
return false;
|
2019-06-03 10:48:02 +00:00
|
|
|
} elseif ( !$commandLineMode ) {
|
|
|
|
|
$user = RequestContext::getMain()->getUser();
|
|
|
|
|
|
2021-05-14 23:16:34 +00:00
|
|
|
if ( !$this->permissionManager->userCan( 'edit', $user, $title ) ) {
|
2019-06-03 10:48:02 +00:00
|
|
|
# Do not import if the importing wiki user cannot edit this page
|
|
|
|
|
$this->notice( 'import-error-edit', $title->getPrefixedText() );
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
|
|
|
|
|
2016-02-17 09:09:32 +00:00
|
|
|
return [ $title, $foreignTitle ];
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|
2020-04-06 09:16:17 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param string $model
|
|
|
|
|
* @return ContentHandler
|
|
|
|
|
*/
|
|
|
|
|
private function getContentHandler( $model ) {
|
2021-05-14 23:16:34 +00:00
|
|
|
return $this->contentHandlerFactory->getContentHandler( $model );
|
2020-04-06 09:16:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param Title $title
|
|
|
|
|
* @param string $role
|
|
|
|
|
*
|
|
|
|
|
* @return string
|
|
|
|
|
*/
|
|
|
|
|
private function getDefaultContentModel( $title, $role ) {
|
2021-05-14 23:16:34 +00:00
|
|
|
return $this->slotRoleRegistry
|
2020-04-06 09:16:17 +00:00
|
|
|
->getRoleHandler( $role )
|
|
|
|
|
->getDefaultModel( $title );
|
|
|
|
|
}
|
2011-01-31 07:07:43 +00:00
|
|
|
}
|