Add SPARQL client to core
This will be used for deep category search implementation, also Wikibase one in repo/maintenance will be changed to use the same codebase. Bug: T185127 Change-Id: Ie8dd4a5aff55d90f02426f1430ed5214c7327bbc
This commit is contained in:
parent
5612400c82
commit
7b27f210ba
4 changed files with 440 additions and 0 deletions
|
|
@ -938,6 +938,8 @@ $wgAutoloadLocalClasses = [
|
|||
'MediaWiki\\Shell\\Result' => __DIR__ . '/includes/shell/Result.php',
|
||||
'MediaWiki\\Shell\\Shell' => __DIR__ . '/includes/shell/Shell.php',
|
||||
'MediaWiki\\Site\\MediaWikiPageNameNormalizer' => __DIR__ . '/includes/site/MediaWikiPageNameNormalizer.php',
|
||||
'MediaWiki\\Sparql\\SparqlClient' => __DIR__ . '/includes/sparql/SparqlClient.php',
|
||||
'MediaWiki\\Sparql\\SparqlException' => __DIR__ . '/includes/sparql/SparqlException.php',
|
||||
'MediaWiki\\Storage\\BlobAccessException' => __DIR__ . '/includes/Storage/BlobAccessException.php',
|
||||
'MediaWiki\\Storage\\BlobStore' => __DIR__ . '/includes/Storage/BlobStore.php',
|
||||
'MediaWiki\\Storage\\BlobStoreFactory' => __DIR__ . '/includes/Storage/BlobStoreFactory.php',
|
||||
|
|
|
|||
220
includes/sparql/SparqlClient.php
Normal file
220
includes/sparql/SparqlClient.php
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
<?php
|
||||
/**
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
* @file
|
||||
*/
|
||||
|
||||
namespace MediaWiki\Sparql;
|
||||
|
||||
use Http;
|
||||
use MediaWiki\Http\HttpRequestFactory;
|
||||
|
||||
/**
|
||||
* Simple SPARQL client
|
||||
*
|
||||
* @author Stas Malyshev
|
||||
*/
|
||||
class SparqlClient {
|
||||
|
||||
/**
|
||||
* Limit on how long can be the query to be sent by GET.
|
||||
*/
|
||||
const MAX_GET_SIZE = 2048;
|
||||
|
||||
/**
|
||||
* User agent for HTTP requests.
|
||||
* @var string
|
||||
*/
|
||||
private $userAgent;
|
||||
|
||||
/**
|
||||
* Query timeout (seconds)
|
||||
* @var int
|
||||
*/
|
||||
private $timeout = 30;
|
||||
|
||||
/**
|
||||
* SPARQL endpoint URL
|
||||
* @var string
|
||||
*/
|
||||
private $endpoint;
|
||||
|
||||
/**
|
||||
* Client options
|
||||
* @var array
|
||||
*/
|
||||
private $options = [];
|
||||
|
||||
/**
|
||||
* @var HttpRequestFactory
|
||||
*/
|
||||
private $requestFactory;
|
||||
|
||||
/**
|
||||
* @param string $url SPARQL Endpoint
|
||||
* @param HttpRequestFactory $requestFactory
|
||||
*/
|
||||
public function __construct( $url, HttpRequestFactory $requestFactory ) {
|
||||
$this->endpoint = $url;
|
||||
$this->requestFactory = $requestFactory;
|
||||
$this->userAgent = Http::userAgent() . " SparqlClient";
|
||||
}
|
||||
|
||||
/**
|
||||
* Set query timeout (in seconds)
|
||||
* @param int $timeout
|
||||
* @return $this
|
||||
*/
|
||||
public function setTimeout( $timeout ) {
|
||||
if ( $timeout >= 0 ) {
|
||||
$this->timeout = $timeout;
|
||||
}
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set client options
|
||||
* @param array $options
|
||||
* @return $this
|
||||
*/
|
||||
public function setClientOptions( $options ) {
|
||||
$this->options = $options;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current user agent.
|
||||
* @return string
|
||||
*/
|
||||
public function getUserAgent() {
|
||||
return $this->userAgent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set user agent string.
|
||||
*
|
||||
* Mote it is not recommended to completely override user agent for
|
||||
* most applications.
|
||||
* @see appendUserAgent() for recommended way of specifying user agent.
|
||||
*
|
||||
* @param string $agent
|
||||
*/
|
||||
public function setUserAgent( $agent ) {
|
||||
$this->userAgent = $agent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Append specific string to user agent.
|
||||
*
|
||||
* This is the recommended way of specifying the user agent
|
||||
* for specific applications of the SparqlClient inside MediaWiki
|
||||
* and extension code.
|
||||
*
|
||||
* @param string $agent
|
||||
*/
|
||||
public function appendUserAgent( $agent ) {
|
||||
$this->userAgent .= ' ' . $agent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Query SPARQL endpoint
|
||||
*
|
||||
* @param string $sparql query
|
||||
* @param bool $rawData Whether to return only values or full data objects
|
||||
*
|
||||
* @return array List of results, one row per array element
|
||||
* Each row will contain fields indexed by variable name.
|
||||
* @throws SparqlException
|
||||
*/
|
||||
public function query( $sparql, $rawData = false ) {
|
||||
if ( empty( $this->endpoint ) ) {
|
||||
throw new SparqlException( 'Endpoint URL can not be empty' );
|
||||
}
|
||||
$queryData = [ "query" => $sparql, "format" => "json" ];
|
||||
$options = array_merge( [ 'method' => 'GET' ], $this->options );
|
||||
|
||||
if ( empty( $options['userAgent'] ) ) {
|
||||
$options['userAgent'] = $this->userAgent;
|
||||
}
|
||||
|
||||
if ( $this->timeout >= 0 ) {
|
||||
// Blazegraph setting, see https://wiki.blazegraph.com/wiki/index.php/REST_API
|
||||
$queryData['maxQueryTimeMillis'] = $this->timeout * 1000;
|
||||
$options['timeout'] = $this->timeout;
|
||||
}
|
||||
|
||||
if ( strlen( $sparql ) > self::MAX_GET_SIZE ) {
|
||||
// big requests go to POST
|
||||
$options['method'] = 'POST';
|
||||
$options['postData'] = 'query=' . urlencode( $sparql );
|
||||
unset( $queryData['query'] );
|
||||
}
|
||||
|
||||
$url = wfAppendQuery( $this->endpoint, $queryData );
|
||||
$request = $this->requestFactory->create( $url, $options, __METHOD__ );
|
||||
|
||||
$status = $request->execute();
|
||||
|
||||
if ( !$status->isOK() ) {
|
||||
throw new SparqlException( "HTTP error: {$status->getWikiText()}" );
|
||||
}
|
||||
$result = $request->getContent();
|
||||
\MediaWiki\suppressWarnings();
|
||||
$data = json_decode( $result, true );
|
||||
\MediaWiki\restoreWarnings();
|
||||
if ( $data === null || $data === false ) {
|
||||
throw new SparqlException( "HTTP request failed, response:\n" .
|
||||
substr( $result, 1024 ) );
|
||||
}
|
||||
|
||||
return $this->extractData( $data, $rawData );
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract data from SPARQL response format.
|
||||
* The response must be in format described in:
|
||||
* https://www.w3.org/TR/sparql11-results-json/
|
||||
*
|
||||
* @param array $data SPARQL result
|
||||
* @param bool $rawData Whether to return only values or full data objects
|
||||
*
|
||||
* @return array List of results, one row per element.
|
||||
*/
|
||||
private function extractData( $data, $rawData = false ) {
|
||||
$result = [];
|
||||
if ( $data && !empty( $data['results'] ) ) {
|
||||
$vars = $data['head']['vars'];
|
||||
$resrow = [];
|
||||
foreach ( $data['results']['bindings'] as $row ) {
|
||||
foreach ( $vars as $var ) {
|
||||
if ( !isset( $row[$var] ) ) {
|
||||
$resrow[$var] = null;
|
||||
continue;
|
||||
}
|
||||
if ( $rawData ) {
|
||||
$resrow[$var] = $row[$var];
|
||||
} else {
|
||||
$resrow[$var] = $row[$var]['value'];
|
||||
}
|
||||
}
|
||||
$result[] = $resrow;
|
||||
}
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
}
|
||||
30
includes/sparql/SparqlException.php
Normal file
30
includes/sparql/SparqlException.php
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
<?php
|
||||
/**
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
* @file
|
||||
*/
|
||||
|
||||
namespace MediaWiki\Sparql;
|
||||
|
||||
use Exception;
|
||||
|
||||
/**
|
||||
* Exception for SPARQLClient
|
||||
* @author Stas Malyshev
|
||||
*/
|
||||
class SparqlException extends Exception {
|
||||
}
|
||||
188
tests/phpunit/includes/sparql/SparqlClientTest.php
Normal file
188
tests/phpunit/includes/sparql/SparqlClientTest.php
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
<?php
|
||||
namespace MediaWiki\Sparql;
|
||||
|
||||
use Http;
|
||||
use MediaWiki\Http\HttpRequestFactory;
|
||||
use MWHttpRequest;
|
||||
use PHPUnit_Framework_TestCase;
|
||||
|
||||
/**
|
||||
* @covers \MediaWiki\Sparql\SparqlClient
|
||||
*/
|
||||
class SparqlClientTest extends PHPUnit_Framework_TestCase {
|
||||
|
||||
private function getRequestFactory( $request ) {
|
||||
$requestFactory = $this->getMock( HttpRequestFactory::class );
|
||||
$requestFactory->method( 'create' )->willReturn( $request );
|
||||
return $requestFactory;
|
||||
}
|
||||
|
||||
private function getRequestMock( $content ) {
|
||||
$request = $this->getMockBuilder( MWHttpRequest::class )->disableOriginalConstructor()->getMock();
|
||||
$request->method( 'execute' )->willReturn( \Status::newGood( 200 ) );
|
||||
$request->method( 'getContent' )->willReturn( $content );
|
||||
return $request;
|
||||
}
|
||||
|
||||
public function testQuery() {
|
||||
$json = <<<JSON
|
||||
{
|
||||
"head" : {
|
||||
"vars" : [ "x", "y", "z" ]
|
||||
},
|
||||
"results" : {
|
||||
"bindings" : [ {
|
||||
"x" : {
|
||||
"type" : "uri",
|
||||
"value" : "http://wikiba.se/ontology#Dump"
|
||||
},
|
||||
"y" : {
|
||||
"type" : "uri",
|
||||
"value" : "http://creativecommons.org/ns#license"
|
||||
},
|
||||
"z" : {
|
||||
"type" : "uri",
|
||||
"value" : "http://creativecommons.org/publicdomain/zero/1.0/"
|
||||
}
|
||||
}, {
|
||||
"x" : {
|
||||
"type" : "uri",
|
||||
"value" : "http://wikiba.se/ontology#Dump"
|
||||
},
|
||||
"z" : {
|
||||
"type" : "literal",
|
||||
"value" : "0.1.0"
|
||||
}
|
||||
} ]
|
||||
}
|
||||
}
|
||||
JSON;
|
||||
|
||||
$request = $this->getRequestMock( $json );
|
||||
$client = new SparqlClient( 'http://acme.test/', $this->getRequestFactory( $request ) );
|
||||
|
||||
// values only
|
||||
$result = $client->query( "TEST SPARQL" );
|
||||
$this->assertCount( 2, $result );
|
||||
$this->assertEquals( 'http://wikiba.se/ontology#Dump', $result[0]['x'] );
|
||||
$this->assertEquals( 'http://creativecommons.org/ns#license', $result[0]['y'] );
|
||||
$this->assertEquals( '0.1.0', $result[1]['z'] );
|
||||
$this->assertNull( $result[1]['y'] );
|
||||
// raw data format
|
||||
$result = $client->query( "TEST SPARQL 2", true );
|
||||
$this->assertCount( 2, $result );
|
||||
$this->assertEquals( 'uri', $result[0]['x']['type'] );
|
||||
$this->assertEquals( 'http://wikiba.se/ontology#Dump', $result[0]['x']['value'] );
|
||||
$this->assertEquals( 'literal', $result[1]['z']['type'] );
|
||||
$this->assertEquals( '0.1.0', $result[1]['z']['value'] );
|
||||
$this->assertNull( $result[1]['y'] );
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \Mediawiki\Sparql\SparqlException
|
||||
*/
|
||||
public function testBadQuery() {
|
||||
$request = $this->getMockBuilder( MWHttpRequest::class )->disableOriginalConstructor()->getMock();
|
||||
$client = new SparqlClient( 'http://acme.test/', $this->getRequestFactory( $request ) );
|
||||
|
||||
$request->method( 'execute' )->willReturn( \Status::newFatal( "Bad query" ) );
|
||||
$result = $client->query( "TEST SPARQL 3" );
|
||||
}
|
||||
|
||||
public function optionsProvider() {
|
||||
return [
|
||||
'defaults' => [
|
||||
'TEST тест SPARQL 4 ',
|
||||
null,
|
||||
null,
|
||||
[
|
||||
'http://acme.test/',
|
||||
'query=TEST+%D1%82%D0%B5%D1%81%D1%82+SPARQL+4+',
|
||||
'format=json',
|
||||
'maxQueryTimeMillis=30000',
|
||||
],
|
||||
[
|
||||
'method' => 'GET',
|
||||
'userAgent' => Http::userAgent() ." SparqlClient",
|
||||
'timeout' => 30
|
||||
]
|
||||
],
|
||||
'big query' => [
|
||||
str_repeat( 'ZZ', SparqlClient::MAX_GET_SIZE ),
|
||||
null,
|
||||
null,
|
||||
[
|
||||
'format=json',
|
||||
'maxQueryTimeMillis=30000',
|
||||
],
|
||||
[
|
||||
'method' => 'POST',
|
||||
'postData' => 'query=' . str_repeat( 'ZZ', SparqlClient::MAX_GET_SIZE ),
|
||||
]
|
||||
],
|
||||
'timeout 1s' => [
|
||||
'TEST SPARQL 4',
|
||||
null,
|
||||
1,
|
||||
[
|
||||
'maxQueryTimeMillis=1000',
|
||||
],
|
||||
[
|
||||
'timeout' => 1
|
||||
]
|
||||
],
|
||||
'more options' => [
|
||||
'TEST SPARQL 5',
|
||||
[
|
||||
'userAgent' => 'My Test',
|
||||
'randomOption' => 'duck',
|
||||
],
|
||||
null,
|
||||
[],
|
||||
[
|
||||
'userAgent' => 'My Test',
|
||||
'randomOption' => 'duck',
|
||||
]
|
||||
],
|
||||
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @dataProvider optionsProvider
|
||||
* @param string $sparql
|
||||
* @param array|null $options
|
||||
* @param int|null $timeout
|
||||
* @param array $expectedUrl
|
||||
* @param array $expectedOptions
|
||||
*/
|
||||
public function testOptions( $sparql, $options, $timeout, $expectedUrl, $expectedOptions ) {
|
||||
$requestFactory = $this->getMock( HttpRequestFactory::class );
|
||||
$client = new SparqlClient( 'http://acme.test/', $requestFactory );
|
||||
|
||||
$request = $this->getRequestMock( '{}' );
|
||||
|
||||
$requestFactory->method( 'create' )->willReturnCallback(
|
||||
function ( $url, $options ) use ( $request, $expectedUrl, $expectedOptions ) {
|
||||
foreach ( $expectedUrl as $eurl ) {
|
||||
$this->assertContains( $eurl, $url );
|
||||
}
|
||||
foreach ( $expectedOptions as $ekey => $evalue ) {
|
||||
$this->assertArrayHasKey( $ekey, $options );
|
||||
$this->assertEquals( $options[$ekey], $evalue );
|
||||
}
|
||||
return $request;
|
||||
}
|
||||
);
|
||||
|
||||
if ( !is_null( $options ) ) {
|
||||
$client->setClientOptions( $options );
|
||||
}
|
||||
if ( !is_null( $timeout ) ) {
|
||||
$client->setTimeout( $timeout );
|
||||
}
|
||||
|
||||
$result = $client->query( $sparql );
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in a new issue