Implement SiteListFileCache and rebuild script

Provides file-based cache of the SitesStore data,
using a static json file dump of the data from the
SiteSQLStore.

Includes a maintenance script to rebuild the sites cache.

Bug: 56602
Bug: 45532
Change-Id: Iaee4c1f9fb5d54efe01975f733ebd5c339ac106f
This commit is contained in:
aude 2014-11-20 19:24:39 -05:00 committed by Tim Starling
parent a9d435fb43
commit 90f6efc360
7 changed files with 550 additions and 0 deletions

View file

@ -917,6 +917,7 @@ $wgAutoloadLocalClasses = array(
'RebuildLocalisationCache' => __DIR__ . '/maintenance/rebuildLocalisationCache.php',
'RebuildMessages' => __DIR__ . '/maintenance/rebuildmessages.php',
'RebuildRecentchanges' => __DIR__ . '/maintenance/rebuildrecentchanges.php',
'RebuildSitesCache' => __DIR__ . '/maintenance/rebuildSitesCache.php',
'RebuildTextIndex' => __DIR__ . '/maintenance/rebuildtextindex.php',
'RecentChange' => __DIR__ . '/includes/changes/RecentChange.php',
'RecompressTracked' => __DIR__ . '/maintenance/storage/recompressTracked.php',
@ -1023,6 +1024,8 @@ $wgAutoloadLocalClasses = array(
'SiteArray' => __DIR__ . '/includes/site/SiteList.php',
'SiteConfiguration' => __DIR__ . '/includes/SiteConfiguration.php',
'SiteList' => __DIR__ . '/includes/site/SiteList.php',
'SiteListFileCache' => __DIR__ . '/includes/site/SiteListFileCache.php',
'SiteListFileCacheBuilder' => __DIR__ . '/includes/site/SiteListFileCacheBuilder.php',
'SiteObject' => __DIR__ . '/includes/site/Site.php',
'SiteSQLStore' => __DIR__ . '/includes/site/SiteSQLStore.php',
'SiteStats' => __DIR__ . '/includes/SiteStats.php',

View file

@ -3754,6 +3754,18 @@ $wgInterwikiFallbackSite = 'wiki';
/** @} */ # end of Interwiki caching settings.
/**
* @name SiteStore caching settings.
* @{
*/
/**
* Specify the file location for the SiteStore json cache file.
*/
$wgSitesCacheFile = false;
/** @} */ # end of SiteStore caching settings.
/**
* If local interwikis are set up which allow redirects,
* set this regexp to restrict URLs which will be displayed

View file

@ -0,0 +1,126 @@
<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @since 1.25
*
* @file
*
* @license GNU GPL v2+
*/
class SiteListFileCache {
/**
* @var SiteList
*/
private $sites = null;
/**
* @var string
*/
private $cacheFile;
/**
* @param string $cacheFile
*/
public function __construct( $cacheFile ) {
$this->cacheFile = $cacheFile;
}
/**
* @since 1.25
*
* @return SiteList
*/
public function getSites() {
if ( $this->sites === null ) {
$this->sites = $this->loadSitesFromCache();
}
return $this->sites;
}
/**
* @since 1.25
*/
public function getSite( $globalId ) {
$sites = $this->getSites();
return $sites->hasSite( $globalId ) ? $sites->getSite( $globalId ) : null;
}
/**
* @return SiteList
*/
private function loadSitesFromCache() {
$data = $this->loadJsonFile();
$sites = new SiteList();
// @todo lazy initialize the site objects in the site list (e.g. only when needed to access)
foreach( $data['sites'] as $siteArray ) {
$sites[] = $this->newSiteFromArray( $siteArray );
}
return $sites;
}
/**
* @throws MWException
* @return array
*/
private function loadJsonFile() {
if ( !is_readable( $this->cacheFile ) ) {
throw new MWException( 'SiteList cache file not found.' );
}
$contents = file_get_contents( $this->cacheFile );
$data = json_decode( $contents, true );
if ( !is_array( $data ) || !array_key_exists( 'sites', $data ) ) {
throw new MWException( 'SiteStore json cache data is invalid.' );
}
return $data;
}
/**
* @param array $data
*
* @return Site
*/
private function newSiteFromArray( array $data ) {
$siteType = array_key_exists( 'type', $data ) ? $data['type'] : Site::TYPE_UNKNOWN;
$site = Site::newForType( $siteType );
$site->setGlobalId( $data['globalid'] );
$site->setInternalId( $data['internalid'] );
$site->setForward( $data['forward'] );
$site->setGroup( $data['group'] );
$site->setLanguageCode( $data['language'] );
$site->setSource( $data['source'] );
$site->setExtraData( $data['data'] );
$site->setExtraConfig( $data['config'] );
foreach( $data['identifiers'] as $identifier ) {
$site->addLocalId( $identifier['type'], $identifier['key'] );
}
return $site;
}
}

View file

@ -0,0 +1,113 @@
<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @since 1.25
*
* @file
*
* @license GNU GPL v2+
*/
class SiteListFileCacheBuilder {
/**
* @var SiteStore
*/
private $siteStore;
/**
* @var string
*/
private $cacheFile;
/**
* @param SiteStore $siteStore
* @param string $cacheFile
*/
public function __construct( SiteStore $siteStore, $cacheFile ) {
$this->siteStore = $siteStore;
$this->cacheFile = $cacheFile;
}
public function build() {
$this->sites = $this->siteStore->getSites( 'recache' );
$this->cacheSites( $this->sites->getArrayCopy() );
}
/**
* @param Site[] $sites
*
* @throws MWException if in manualRecache mode
* @return bool
*/
private function cacheSites( array $sites ) {
$sitesArray = array();
foreach ( $sites as $site ) {
$globalId = $site->getGlobalId();
$sitesArray[$globalId] = $this->getSiteAsArray( $site );
}
$json = json_encode( array(
'sites' => $sitesArray
) );
$result = file_put_contents( $this->cacheFile, $json );
return $result !== false;
}
/**
* @param Site $site
*
* @return array
*/
private function getSiteAsArray( Site $site ) {
$siteEntry = unserialize( $site->serialize() );
$siteIdentifiers = $this->buildLocalIdentifiers( $site );
$identifiersArray = array();
foreach( $siteIdentifiers as $identifier ) {
$identifiersArray[] = $identifier;
}
$siteEntry['identifiers'] = $identifiersArray;
return $siteEntry;
}
/**
* @param Site $site
*
* @return array Site local identifiers
*/
private function buildLocalIdentifiers( Site $site ) {
$localIds = array();
foreach ( $site->getLocalIds() as $idType => $ids ) {
foreach ( $ids as $id ) {
$localIds[] = array(
'type' => $idType,
'key' => $id
);
}
}
return $localIds;
}
}

View file

@ -0,0 +1,68 @@
<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Maintenance
*/
require_once __DIR__ . '/Maintenance.php';
/**
* Maintenance script to dump the SiteStore as a static json file.
*
* @ingroup Maintenance
*/
class RebuildSitesCache extends Maintenance {
public function __construct() {
parent::__construct();
$this->mDescription = "Dumps site store as json";
$this->addOption( 'file', 'File to output the json to', false, true );
}
public function execute() {
$siteListFileCacheBuilder = new SiteListFileCacheBuilder(
SiteSQLStore::newInstance(),
$this->getCacheFile()
);
$siteListFileCacheBuilder->build();
}
/**
* @return string
*/
private function getCacheFile() {
if ( $this->hasOption( 'file' ) ) {
$jsonFile = $this->getOption( 'file' );
} else {
$jsonFile = $this->getConfig()->get( 'SitesCacheFile' );
if ( $jsonFile === false ) {
$this->error( 'Error: No sites cache file is set in configuration.', 1 );
}
}
return $jsonFile;
}
}
$maintClass = "RebuildSitesCache";
require_once RUN_MAINTENANCE_IF_MAIN;

View file

@ -0,0 +1,130 @@
<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @since 1.25
*
* @ingroup Site
* @ingroup Test
*
* @covers SiteListFileCacheBuilder
* @group Site
*
* @licence GNU GPL v2+
* @author Katie Filbert < aude.wiki@gmail.com >
*/
class SiteListFileCacheBuilderTest extends PHPUnit_Framework_TestCase {
public function testBuild() {
$cacheFile = $this->getCacheFile();
$cacheBuilder = $this->newSiteListFileCacheBuilder( $this->getSites(), $cacheFile );
$cacheBuilder->build();
$contents = file_get_contents( $cacheFile );
$this->assertEquals( json_encode( $this->getExpectedData() ), $contents );
}
private function getExpectedData() {
return array(
'sites' => array(
'foobar' => array(
'globalid' => 'foobar',
'type' => 'unknown',
'group' => 'none',
'source' => 'local',
'language' => null,
'localids' => array(),
'config' => array(),
'data' => array(),
'forward' => false,
'internalid' => null,
'identifiers' => array()
),
'enwiktionary' => array(
'globalid' => 'enwiktionary',
'type' => 'mediawiki',
'group' => 'wiktionary',
'source' => 'local',
'language' => 'en',
'localids' => array(
'equivalent' => array( 'enwiktionary' )
),
'config' => array(),
'data' => array(
'paths' => array(
'page_path' => 'https://en.wiktionary.org/wiki/$1',
'file_path' => 'https://en.wiktionary.org/w/$1'
)
),
'forward' => false,
'internalid' => null,
'identifiers' => array(
array(
'type' => 'equivalent',
'key' => 'enwiktionary'
)
)
)
)
);
}
private function newSiteListFileCacheBuilder( SiteList $sites, $cacheFile ) {
return new SiteListFileCacheBuilder(
$this->getSiteSQLStore( $sites ),
$cacheFile
);
}
private function getSiteSQLStore( SiteList $sites ) {
$siteSQLStore = $this->getMockBuilder( 'SiteSQLStore' )
->disableOriginalConstructor()
->getMock();
$siteSQLStore->expects( $this->any() )
->method( 'getSites' )
->will( $this->returnValue( $sites ) );
return $siteSQLStore;
}
private function getSites() {
$sites = array();
$site = new Site();
$site->setGlobalId( 'foobar' );
$sites[] = $site;
$site = new MediaWikiSite();
$site->setGlobalId( 'enwiktionary' );
$site->setGroup( 'wiktionary' );
$site->setLanguageCode( 'en' );
$site->addNavigationId( 'enwiktionary' );
$site->setPath( MediaWikiSite::PATH_PAGE, "https://en.wiktionary.org/wiki/$1" );
$site->setPath( MediaWikiSite::PATH_FILE, "https://en.wiktionary.org/w/$1" );
$sites[] = $site;
return new SiteList( $sites );
}
private function getCacheFile() {
return sys_get_temp_dir() . '/sites-' . time() . '.json';
}
}

View file

@ -0,0 +1,98 @@
<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @since 1.25
*
* @ingroup Site
* @ingroup Test
*
* @covers SiteListFileCache
* @group Site
*
* @licence GNU GPL v2+
* @author Katie Filbert < aude.wiki@gmail.com >
*/
class SiteListFileCacheTest extends PHPUnit_Framework_TestCase {
public function testGetSites() {
$cacheFile = $this->getCacheFile();
$sites = $this->getSites();
$cacheBuilder = $this->newSiteListFileCacheBuilder( $sites, $cacheFile );
$cacheBuilder->build();
$cache = new SiteListFileCache( $cacheFile );
$this->assertEquals( $sites, $cache->getSites() );
}
public function testGetSite() {
$cacheFile = $this->getCacheFile();
$sites = $this->getSites();
$cacheBuilder = $this->newSiteListFileCacheBuilder( $sites, $cacheFile );
$cacheBuilder->build();
$cache = new SiteListFileCache( $cacheFile );
$this->assertEquals( $sites->getSite( 'enwiktionary' ), $cache->getSite( 'enwiktionary' ) );
}
private function newSiteListFileCacheBuilder( SiteList $sites, $cacheFile ) {
return new SiteListFileCacheBuilder(
$this->getSiteSQLStore( $sites ),
$cacheFile
);
}
private function getSiteSQLStore( SiteList $sites ) {
$siteSQLStore = $this->getMockBuilder( 'SiteSQLStore' )
->disableOriginalConstructor()
->getMock();
$siteSQLStore->expects( $this->any() )
->method( 'getSites' )
->will( $this->returnValue( $sites ) );
return $siteSQLStore;
}
private function getSites() {
$sites = array();
$site = new Site();
$site->setGlobalId( 'foobar' );
$sites[] = $site;
$site = new MediaWikiSite();
$site->setGlobalId( 'enwiktionary' );
$site->setGroup( 'wiktionary' );
$site->setLanguageCode( 'en' );
$site->addNavigationId( 'enwiktionary' );
$site->setPath( MediaWikiSite::PATH_PAGE, "https://en.wiktionary.org/wiki/$1" );
$site->setPath( MediaWikiSite::PATH_FILE, "https://en.wiktionary.org/w/$1" );
$sites[] = $site;
return new SiteList( $sites );
}
private function getCacheFile() {
return sys_get_temp_dir() . '/sites-' . time() . '.json';
}
}