Up until now, the import backend has tried to resolve titles in the XML data using the regular Title class. This is a disastrous idea, as local namespace names often do not match foreign namespace titles. There is enough metadata present in XML dumps generated by modern MW versions for the target namespace ID and name to be reliably determined. This metadata is contained in the <siteinfo> and <ns> tags, which (unbelievably enough) was totally ignored by WikiImporter until now. Fallbacks are provided for older XML dump versions which may be missing some or all of this metadata. The ForeignTitle class is introduced. This is intended specifically for the resolution of titles on foreign wikis. In the future, an InterwikiTitle class could be added, which would inherit ForeignTitle and add members for the interwiki prefix and fragment. Factory classes to generate ForeignTitle objects from string data, and Title objects from ForeignTitle objects, are also added. The 'AfterImportPage' hook has been modified so the second argument is a ForeignTitle object instead of a Title (the documentation was wrong, it was never a string). LiquidThreads, SMW and FacetedSearch all use this hook but none of them use the $origTitle parameter. Bug: T32723 Bug: T42192 Change-Id: Iaa58e1b9fd7287cdf999cef6a6f3bb63cd2a4778
71 lines
2.8 KiB
PHP
71 lines
2.8 KiB
PHP
<?php
|
|
/**
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*
|
|
* @file
|
|
* @license GPL 2+
|
|
*/
|
|
|
|
/**
|
|
* A parser that translates page titles on a foreign wiki into ForeignTitle
|
|
* objects, with no knowledge of the namespace setup on the foreign site.
|
|
*/
|
|
class NaiveForeignTitleFactory implements ForeignTitleFactory {
|
|
/**
|
|
* Creates a ForeignTitle object based on the page title, and optionally the
|
|
* namespace ID, of a page on a foreign wiki. These values could be, for
|
|
* example, the <title> and <ns> attributes found in an XML dump.
|
|
*
|
|
* Although exported XML dumps have contained a map of namespace IDs to names
|
|
* since MW 1.5, the importer used to completely ignore the <siteinfo> tag
|
|
* before MW 1.25. It is therefore possible that custom XML dumps (i.e. not
|
|
* generated by Special:Export) have been created without this metadata.
|
|
* As a result, this code falls back to using namespace data for the local
|
|
* wiki (similar to buggy pre-1.25 behaviour) if $ns is not supplied.
|
|
*
|
|
* @param string $title The page title
|
|
* @param int|null $ns The namespace ID, or null if this data is not available
|
|
* @return ForeignTitle
|
|
*/
|
|
public function createForeignTitle( $title, $ns = null ) {
|
|
$pieces = explode( ':', $title, 2 );
|
|
|
|
global $wgContLang;
|
|
|
|
// Can we assume that the part of the page title before the colon is a
|
|
// namespace name?
|
|
//
|
|
// XML export schema version 0.5 and earlier (MW 1.18 and earlier) does not
|
|
// contain a <ns> tag, so we need to be able to handle that case.
|
|
//
|
|
// If we know the namespace ID, we assume a non-zero namespace ID means
|
|
// the ':' sets off a valid namespace name. If we don't know the namespace
|
|
// ID, we fall back to using the local wiki's namespace names to resolve
|
|
// this -- better than nothing, and mimics the old crappy behavior
|
|
$isNamespacePartValid = is_null( $ns ) ?
|
|
( $wgContLang->getNsIndex( $pieces[0] ) !== false ) :
|
|
$ns != 0;
|
|
|
|
if ( count( $pieces ) === 2 && $isNamespacePartValid ) {
|
|
list( $namespaceName, $pageName ) = $pieces;
|
|
} else {
|
|
$namespaceName = '';
|
|
$pageName = $title;
|
|
}
|
|
|
|
return new ForeignTitle( $ns, $namespaceName, $pageName );
|
|
}
|
|
}
|