diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 7883de553ca..464fc8a0fb4 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -639,6 +639,13 @@ $wgImgAuthUrlPathMap = []; * - isPrivate Set this if measures should always be taken to keep the files private. * One should not trust this to assure that the files are not web readable; * the server configuration should be done manually depending on the backend. + * - useJsonMetadata Whether handler metadata should be stored in JSON format. Default: false. + * - useSplitMetadata Whether handler metadata should be split up and stored in the text table. + * Default: false. + * - splitMetadataThreshold + * If the media handler opts in, large metadata items will be split into a + * separate blob in the database if the item is larger than this threshold. + * Default: 1000 * * These settings describe a foreign MediaWiki installation. They are optional, and will be ignored * for local repositories: diff --git a/includes/Storage/BlobStore.php b/includes/Storage/BlobStore.php index 74e3cb83c47..4fcffc3d207 100644 --- a/includes/Storage/BlobStore.php +++ b/includes/Storage/BlobStore.php @@ -83,6 +83,11 @@ interface BlobStore { */ public const FORMAT_HINT = 'cont_format'; + /** + * Hint key for an image name. + */ + public const IMAGE_HINT = 'img_name'; + /** * Retrieve a blob, given an address. * diff --git a/includes/filerepo/ForeignDBRepo.php b/includes/filerepo/ForeignDBRepo.php index daff0d1dc76..17fb75fd4a6 100644 --- a/includes/filerepo/ForeignDBRepo.php +++ b/includes/filerepo/ForeignDBRepo.php @@ -21,6 +21,7 @@ * @ingroup FileRepo */ +use MediaWiki\Storage\BlobStore; use Wikimedia\Rdbms\Database; use Wikimedia\Rdbms\DatabaseDomain; use Wikimedia\Rdbms\IDatabase; @@ -125,4 +126,8 @@ class ForeignDBRepo extends LocalRepo { public function getInfo() { return FileRepo::getInfo(); } + + public function getBlobStore(): ?BlobStore { + return null; + } } diff --git a/includes/filerepo/LocalRepo.php b/includes/filerepo/LocalRepo.php index 921c7da9b04..4308785dcc9 100644 --- a/includes/filerepo/LocalRepo.php +++ b/includes/filerepo/LocalRepo.php @@ -26,6 +26,7 @@ use MediaWiki\Linker\LinkTarget; use MediaWiki\MediaWikiServices; use MediaWiki\Page\PageIdentity; use MediaWiki\Permissions\Authority; +use MediaWiki\Storage\BlobStore; use Wikimedia\Rdbms\Database; use Wikimedia\Rdbms\IDatabase; use Wikimedia\Rdbms\IResultWrapper; @@ -56,6 +57,18 @@ class LocalRepo extends FileRepo { /** @var bool Whether shared cache keys are exposed/accessible */ protected $hasAccessibleSharedCache; + /** @var BlobStore */ + protected $blobStore; + + /** @var bool */ + protected $useJsonMetadata = false; + + /** @var bool */ + protected $useSplitMetadata = false; + + /** @var int|null */ + protected $splitMetadataThreshold = 1000; + public function __construct( array $info = null ) { parent::__construct( $info ); @@ -71,6 +84,18 @@ class LocalRepo extends FileRepo { 'dbHandleFactory' => $this->getDBFactory() ] ); } + + foreach ( + [ + 'useJsonMetadata', + 'useSplitMetadata', + 'splitMetadataThreshold' + ] as $option + ) { + if ( isset( $info[$option] ) ) { + $this->$option = $info[$option]; + } + } } /** @@ -608,4 +633,51 @@ class LocalRepo extends FileRepo { return parent::$function( ...$args ); } } + + /** + * Returns true if files should store metadata in JSON format. This + * requires metadata from all handlers to be JSON-serializable. + * + * To avoid breaking existing metadata, reading JSON metadata is always + * enabled regardless of this setting. + * + * @return bool + */ + public function isJsonMetadataEnabled() { + return $this->useJsonMetadata; + } + + /** + * Returns true if files should split up large metadata, storing parts of + * it in the BlobStore. + * + * @return bool + */ + public function isSplitMetadataEnabled() { + return $this->isJsonMetadataEnabled() && $this->useSplitMetadata; + } + + /** + * Get the threshold above which metadata items should be split into + * separate storage, or null if no splitting should be done. + * + * @return int + */ + public function getSplitMetadataThreshold() { + return $this->splitMetadataThreshold; + } + + /** + * Get a BlobStore for storing and retrieving large metadata, or null if + * that can't be done. + * + * @return ?BlobStore + */ + public function getBlobStore(): ?BlobStore { + if ( !$this->blobStore ) { + $this->blobStore = MediaWikiServices::getInstance()->getBlobStoreFactory() + ->newBlobStore( $this->dbDomain ); + } + return $this->blobStore; + } } diff --git a/includes/filerepo/file/LocalFile.php b/includes/filerepo/file/LocalFile.php index 4d1ab7f2f22..111d7a0ad3e 100644 --- a/includes/filerepo/file/LocalFile.php +++ b/includes/filerepo/file/LocalFile.php @@ -26,6 +26,7 @@ use MediaWiki\MediaWikiServices; use MediaWiki\Permissions\Authority; use MediaWiki\Revision\RevisionRecord; use MediaWiki\Revision\RevisionStore; +use MediaWiki\Storage\BlobStore; use MediaWiki\User\UserIdentity; use MediaWiki\User\UserIdentityValue; use Wikimedia\Rdbms\Blob; @@ -88,6 +89,17 @@ class LocalFile extends File { /** @var array Unserialized metadata */ protected $metadataArray = []; + /** @var string[] Map of metadata item name to blob address */ + protected $metadataBlobs = []; + + /** + * Map of metadata item name to blob address for items that exist but + * have not yet been loaded into $this->metadataArray + * + * @var string[] + */ + protected $unloadedMetadataBlobs = []; + /** @var string SHA-1 base 36 content hash */ protected $sha1; @@ -353,7 +365,16 @@ class LocalFile extends File { $cacheVal['user'] = $this->user->getId(); $cacheVal['user_text'] = $this->user->getName(); } - $cacheVal['metadata'] = $this->metadataArray; + + // Don't cache metadata items stored as blobs, since they tend to be large + if ( $this->metadataBlobs ) { + $cacheVal['metadata'] = array_diff_key( + $this->metadataArray, $this->metadataBlobs ); + // Save the blob addresses + $cacheVal['metadataBlobs'] = $this->metadataBlobs; + } else { + $cacheVal['metadata'] = $this->metadataArray; + } // Strip off excessive entries from the subset of fields that can become large. // If the cache value gets to large it will not fit in memcached and nothing will @@ -363,6 +384,9 @@ class LocalFile extends File { && strlen( serialize( $cacheVal[$field] ) ) > 100 * 1024 ) { unset( $cacheVal[$field] ); // don't let the value get too big + if ( $field === 'metadata' ) { + unset( $cacheVal['metadataBlobs'] ); + } } } @@ -834,6 +858,16 @@ class LocalFile extends File { $this->loadMetadataFromString( $info['metadata'] ); } elseif ( is_array( $info['metadata'] ) ) { $this->metadataArray = $info['metadata']; + if ( isset( $info['metadataBlobs'] ) ) { + $this->metadataBlobs = $info['metadataBlobs']; + $this->unloadedMetadataBlobs = array_diff_key( + $this->metadataBlobs, + $this->metadataArray + ); + } else { + $this->metadataBlobs = []; + $this->unloadedMetadataBlobs = []; + } } else { $logger = LoggerFactory::getInstance( 'LocalFile' ); $logger->warning( __METHOD__ . ' given invalid metadata of type ' . @@ -983,12 +1017,97 @@ class LocalFile extends File { */ public function getMetadataArray(): array { $this->load( self::LOAD_ALL ); + if ( $this->unloadedMetadataBlobs ) { + return $this->getMetadataItems( + array_unique( array_merge( + array_keys( $this->metadataArray ), + array_keys( $this->unloadedMetadataBlobs ) + ) ) + ); + } return $this->metadataArray; } + public function getMetadataItems( array $itemNames ): array { + $this->load( self::LOAD_ALL ); + $result = []; + $addresses = []; + foreach ( $itemNames as $itemName ) { + if ( array_key_exists( $itemName, $this->metadataArray ) ) { + $result[$itemName] = $this->metadataArray[$itemName]; + } elseif ( isset( $this->unloadedMetadataBlobs[$itemName] ) ) { + $addresses[$itemName] = $this->unloadedMetadataBlobs[$itemName]; + } + } + if ( $addresses ) { + $blobStore = $this->repo->getBlobStore(); + if ( !$blobStore ) { + LoggerFactory::getInstance( 'LocalFile' )->warning( + "Unable to load metadata: repo has no blob store" ); + return $result; + } + $status = $blobStore->getBlobBatch( $addresses ); + if ( !$status->isGood() ) { + $msg = Status::wrap( $status )->getWikiText( + false, false, 'en' ); + LoggerFactory::getInstance( 'LocalFile' )->warning( + "Error loading metadata from BlobStore: $msg" ); + } + foreach ( $addresses as $itemName => $address ) { + unset( $this->unloadedMetadataBlobs[$itemName] ); + $json = $status->getValue()[$address] ?? null; + if ( $json !== null ) { + $value = $this->jsonDecode( $json ); + $result[$itemName] = $value; + $this->metadataArray[$itemName] = $value; + } + } + } + return $result; + } + + /** + * Do JSON encoding with local flags. Throw an exception if the data cannot be + * serialized. + * + * @throws MWException + * @param mixed $data + * @return string + */ + private function jsonEncode( $data ): string { + $s = json_encode( $data, + JSON_INVALID_UTF8_IGNORE | + JSON_UNESCAPED_SLASHES | + JSON_UNESCAPED_UNICODE ); + if ( $s === false ) { + throw new MWException( __METHOD__ . ': metadata is not JSON-serializable ' . + '(type = ' . $this->getMimeType() . ')' ); + } + return $s; + } + + /** + * Do JSON decoding with local flags. + * + * This doesn't use JsonCodec because JsonCodec can construct objects, + * which we don't want. + * + * Does not throw. Returns false on failure. + * + * @param string $s + * @return mixed The decoded value, or false on failure + */ + private function jsonDecode( string $s ) { + // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged + return @json_decode( $s, true, 512, JSON_INVALID_UTF8_IGNORE ); + } + /** * Serialize the metadata array for insertion into img_metadata, oi_metadata - * or fa_metadata + * or fa_metadata. + * + * If metadata splitting is enabled, this may write blobs to the database, + * returning their addresses. * * @internal * @param IDatabase $db @@ -996,10 +1115,12 @@ class LocalFile extends File { */ public function getMetadataForDb( IDatabase $db ) { $this->load( self::LOAD_ALL ); - if ( !$this->metadataArray ) { + if ( !$this->metadataArray && !$this->metadataBlobs ) { $s = ''; + } elseif ( $this->repo->isJsonMetadataEnabled() ) { + $s = $this->getJsonMetadata(); } else { - $s = serialize( $this->metadataArray ); + $s = serialize( $this->getMetadataArray() ); } if ( !is_string( $s ) ) { throw new MWException( 'Could not serialize image metadata value for DB' ); @@ -1007,6 +1128,66 @@ class LocalFile extends File { return $db->encodeBlob( $s ); } + /** + * Get metadata in JSON format ready for DB insertion, optionally splitting + * items out to BlobStore. + * + * @return string + */ + private function getJsonMetadata() { + // Directly store data that is not already in BlobStore + $envelope = [ + 'data' => array_diff_key( $this->metadataArray, $this->metadataBlobs ) + ]; + + // Also store the blob addresses + if ( $this->metadataBlobs ) { + $envelope['blobs'] = $this->metadataBlobs; + } + + // Try encoding + $s = $this->jsonEncode( $envelope ); + + // Decide whether to try splitting the metadata. + // Return early if it's not going to happen. + if ( !$this->repo->isSplitMetadataEnabled() + || !$this->getHandler() + || !$this->getHandler()->useSplitMetadata() + ) { + return $s; + } + $threshold = $this->repo->getSplitMetadataThreshold(); + if ( !$threshold || strlen( $s ) <= $threshold ) { + return $s; + } + $blobStore = $this->repo->getBlobStore(); + if ( !$blobStore ) { + return $s; + } + + // The data as a whole is above the item threshold. Look for + // large items that can be split out. + $blobAddresses = []; + foreach ( $envelope['data'] as $name => $value ) { + $encoded = $this->jsonEncode( $value ); + if ( strlen( $encoded ) > $threshold ) { + $blobAddresses[$name] = $blobStore->storeBlob( + $encoded, + [ BlobStore::IMAGE_HINT => $this->getName() ] + ); + } + } + // Remove any items that were split out + $envelope['data'] = array_diff_key( $envelope['data'], $blobAddresses ); + $envelope['blobs'] = $blobAddresses; + $s = $this->jsonEncode( $envelope ); + + // Repeated calls to this function should not keep inserting more blobs + $this->metadataBlobs += $blobAddresses; + + return $s; + } + /** * Unserialize a metadata blob which came from the database and store it * in $this. @@ -1028,18 +1209,34 @@ class LocalFile extends File { */ protected function loadMetadataFromString( $metadataString ) { $this->extraDataLoaded = true; + $this->metadataArray = []; + $this->metadataBlobs = []; + $this->unloadedMetadataBlobs = []; $metadataString = (string)$metadataString; if ( $metadataString === '' ) { - $this->metadataArray = []; + return; + } + if ( $metadataString[0] === '{' ) { + $envelope = $this->jsonDecode( $metadataString ); + if ( !$envelope ) { + // Legacy error encoding + $this->metadataArray = [ '_error' => $metadataString ]; + } else { + if ( isset( $envelope['data'] ) ) { + $this->metadataArray = $envelope['data']; + } + if ( isset( $envelope['blobs'] ) ) { + $this->metadataBlobs = $this->unloadedMetadataBlobs = $envelope['blobs']; + } + } } else { // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged $data = @unserialize( $metadataString ); if ( !is_array( $data ) ) { // Legacy error encoding - $this->metadataArray = [ '_error' => $metadataString ]; - } else { - $this->metadataArray = $data; + $data = [ '_error' => $metadataString ]; } + $this->metadataArray = $data; } } diff --git a/includes/media/MediaHandler.php b/includes/media/MediaHandler.php index 6bdc7ad7746..2fe2c02deca 100644 --- a/includes/media/MediaHandler.php +++ b/includes/media/MediaHandler.php @@ -1199,4 +1199,16 @@ abstract class MediaHandler { public function getContentHeaders( $metadata ) { return [ 'X-Content-Dimensions' => '' ]; // T175689 } + + /** + * If this returns true, LocalFile may split metadata up and store its + * constituent items separately. This only makes sense if the handler calls + * File::getMetadataItem() or File::getMetadataItems() instead of + * requesting the whole array at once. + * + * @return bool + */ + public function useSplitMetadata() { + return false; + } } diff --git a/tests/phpunit/includes/filerepo/file/LocalFileTest.php b/tests/phpunit/includes/filerepo/file/LocalFileTest.php index 6bc92206646..311eb272415 100644 --- a/tests/phpunit/includes/filerepo/file/LocalFileTest.php +++ b/tests/phpunit/includes/filerepo/file/LocalFileTest.php @@ -9,6 +9,7 @@ use MediaWiki\MediaWikiServices; use MediaWiki\Permissions\Authority; use MediaWiki\Tests\Unit\Permissions\MockAuthorityTrait; use MediaWiki\User\UserIdentity; +use Wikimedia\TestingAccessWrapper; /** * @group Database @@ -20,6 +21,8 @@ class LocalFileTest extends MediaWikiIntegrationTestCase { parent::setUp(); $this->tablesUsed[] = 'image'; $this->tablesUsed[] = 'oldimage'; + $this->tablesUsed[] = 'page'; + $this->tablesUsed[] = 'text'; } private static function getDefaultInfo() { @@ -511,11 +514,53 @@ class LocalFileTest extends MediaWikiIntegrationTestCase { $this->assertFalse( $file->getDescriptionText() ); } + public function provideLoadFromDBAndCache() { + return [ + 'legacy' => [ + // phpcs:ignore Generic.Files.LineLength + 'a:6:{s:10:"frameCount";i:0;s:9:"loopCount";i:1;s:8:"duration";d:0;s:8:"bitDepth";i:16;s:9:"colorType";s:10:"truecolour";s:8:"metadata";a:2:{s:8:"DateTime";s:19:"2019:07:30 13:52:32";s:15:"_MW_PNG_VERSION";i:1;}}', + [], + false, + ], + 'json' => [ + // phpcs:ignore Generic.Files.LineLength + '{"data":{"frameCount":0,"loopCount":1,"duration":0,"bitDepth":16,"colorType":"truecolour","metadata":{"DateTime":"2019:07:30 13:52:32","_MW_PNG_VERSION":1}}}', + [], + false, + ], + 'json with blobs' => [ + // phpcs:ignore Generic.Files.LineLength + '{"blobs":{"colorType":"__BLOB0__"},"data":{"frameCount":0,"loopCount":1,"duration":0,"bitDepth":16,"metadata":{"DateTime":"2019:07:30 13:52:32","_MW_PNG_VERSION":1}}}', + [ '"truecolour"' ], + false, + ], + 'large (>100KB triggers uncached case)' => [ + // phpcs:ignore Generic.Files.LineLength + '{"data":{"large":"' . str_repeat( 'x', 102401 ) . '","frameCount":0,"loopCount":1,"duration":0,"bitDepth":16,"colorType":"truecolour","metadata":{"DateTime":"2019:07:30 13:52:32","_MW_PNG_VERSION":1}}}', + [], + 102401, + ], + 'large json blob' => [ + // phpcs:ignore Generic.Files.LineLength + '{"blobs":{"large":"__BLOB0__"},"data":{"frameCount":0,"loopCount":1,"duration":0,"bitDepth":16,"colorType":"truecolour","metadata":{"DateTime":"2019:07:30 13:52:32","_MW_PNG_VERSION":1}}}', + [ '"' . str_repeat( 'x', 102401 ) . '"' ], + 102401, + ], + ]; + } + /** + * Test loadFromDB() and loadFromCache() and helpers + * + * @dataProvider provideLoadFromDBAndCache * @covers File * @covers LocalFile + * @param string $meta + * @param array $blobs Metadata blob values + * @param int|false $largeItemSize The size of the "large" metadata item, + * or false if there will be no such item. */ - public function testLoadFromDBAndCache() { + public function testLoadFromDBAndCache( $meta, $blobs, $largeItemSize ) { $services = MediaWikiServices::getInstance(); $cache = new HashBagOStuff; @@ -533,8 +578,41 @@ class LocalFileTest extends MediaWikiIntegrationTestCase { $comment = $services->getCommentStore()->createComment( $dbw, 'comment' ); $title = Title::newFromText( 'File:Random-11m.png' ); - // phpcs:ignore Generic.Files.LineLength - $meta = 'a:6:{s:10:"frameCount";i:0;s:9:"loopCount";i:1;s:8:"duration";d:0;s:8:"bitDepth";i:16;s:9:"colorType";s:10:"truecolour";s:8:"metadata";a:2:{s:8:"DateTime";s:19:"2019:07:30 13:52:32";s:15:"_MW_PNG_VERSION";i:1;}}'; + if ( $blobs ) { + $blobStore = $services->getBlobStore(); + foreach ( $blobs as $i => $value ) { + $address = $blobStore->storeBlob( $value ); + $meta = str_replace( "__BLOB{$i}__", $address, $meta ); + } + } + + // The provided metadata strings should all unserialize to this + $expectedMetaArray = [ + 'frameCount' => 0, + 'loopCount' => 1, + 'duration' => 0.0, + 'bitDepth' => 16, + 'colorType' => 'truecolour', + 'metadata' => [ + 'DateTime' => '2019:07:30 13:52:32', + '_MW_PNG_VERSION' => 1, + ], + ]; + if ( $largeItemSize ) { + $expectedMetaArray['large'] = str_repeat( 'x', $largeItemSize ); + } + $expectedProps = [ + 'name' => 'Random-11m.png', + 'size' => 10816824, + 'width' => 1000, + 'height' => 1800, + 'metadata' => $expectedMetaArray, + 'bits' => 16, + 'media_type' => 'BITMAP', + 'mime' => 'image/png', + 'timestamp' => '20201105235242', + 'sha1' => 'sy02psim0bgdh0jt4vdltuzoh7j80ru' + ]; $dbw->insert( 'image', @@ -557,50 +635,66 @@ class LocalFileTest extends MediaWikiIntegrationTestCase { $repo = $services->getRepoGroup()->getLocalRepo(); $file = $repo->findFile( $title ); - $this->assertSame( 'Random-11m.png', $file->getName() ); - $this->assertSame( 10816824, $file->getSize() ); - $this->assertSame( 1000, $file->getWidth() ); - $this->assertSame( 1800, $file->getHeight() ); - $this->assertSame( unserialize( $meta ), $file->getMetadataArray() ); + $this->assertFileProperties( $file, $expectedProps ); $this->assertSame( 'truecolour', $file->getMetadataItem( 'colorType' ) ); $this->assertSame( [ 'loopCount' => 1, 'bitDepth' => 16 ], $file->getMetadataItems( [ 'loopCount', 'bitDepth', 'nonexistent' ] ) ); - $this->assertSame( 16, $file->getBitDepth() ); - $this->assertSame( 'BITMAP', $file->getMediaType() ); - $this->assertSame( 'image/png', $file->getMimeType() ); $this->assertSame( 'comment', $file->getDescription() ); $this->assertTrue( $user->equals( $file->getUploader() ) ); - $this->assertSame( '20201105235242', $file->getTimestamp() ); - $this->assertSame( 'sy02psim0bgdh0jt4vdltuzoh7j80ru', $file->getSha1() ); - // Test cache - $dbw->delete( 'image', [ 'img_name' => 'Random-11m.png' ], __METHOD__ ); + // Test cache by corrupting DB + // Don't wipe img_metadata though since that will be loaded by loadExtraFromDB() + $dbw->update( 'image', [ 'img_size' => 0 ], + [ 'img_name' => 'Random-11m.png' ], __METHOD__ ); $file = LocalFile::newFromTitle( $title, $repo ); - $this->assertSame( 'Random-11m.png', $file->getName() ); - $this->assertSame( 10816824, $file->getSize() ); - $this->assertSame( 1000, $file->getWidth() ); - $this->assertSame( 1800, $file->getHeight() ); - $this->assertSame( unserialize( $meta ), $file->getMetadataArray() ); + $this->assertFileProperties( $file, $expectedProps ); $this->assertSame( 'truecolour', $file->getMetadataItem( 'colorType' ) ); $this->assertSame( [ 'loopCount' => 1, 'bitDepth' => 16 ], $file->getMetadataItems( [ 'loopCount', 'bitDepth', 'nonexistent' ] ) ); - $this->assertSame( 16, $file->getBitDepth() ); - $this->assertSame( 'BITMAP', $file->getMediaType() ); - $this->assertSame( 'image/png', $file->getMimeType() ); $this->assertSame( 'comment', $file->getDescription() ); $this->assertTrue( $user->equals( $file->getUploader() ) ); - $this->assertSame( '20201105235242', $file->getTimestamp() ); - $this->assertSame( 'sy02psim0bgdh0jt4vdltuzoh7j80ru', $file->getSha1() ); // Make sure we were actually hitting the WAN cache - $cache->clear(); - $file = $repo->findFile( $title ); - $this->assertSame( false, $file ); + $dbw->delete( 'image', [ 'img_name' => 'Random-11m.png' ], __METHOD__ ); + $file->invalidateCache(); + $file = LocalFile::newFromTitle( $title, $repo ); + $this->assertSame( false, $file->exists() ); + } + + private function assertFileProperties( $file, $expectedProps ) { + // Compare metadata without ordering + if ( isset( $expectedProps['metadata'] ) ) { + $this->assertArrayEquals( $expectedProps['metadata'], $file->getMetadataArray() ); + } + + // Filter out unsupported expected properties + $expectedProps = array_intersect_key( + $expectedProps, + array_fill_keys( [ + 'name', 'size', 'width', 'height', + 'bits', 'media_type', 'mime', 'timestamp', 'sha1' + ], true ) + ); + + // Compare the other properties + $actualProps = [ + 'name' => $file->getName(), + 'size' => $file->getSize(), + 'width' => $file->getWidth(), + 'height' => $file->getHeight(), + 'bits' => $file->getBitDepth(), + 'media_type' => $file->getMediaType(), + 'mime' => $file->getMimeType(), + 'timestamp' => $file->getTimestamp(), + 'sha1' => $file->getSha1() + ]; + $actualProps = array_intersect_key( $actualProps, $expectedProps ); + $this->assertArrayEquals( $expectedProps, $actualProps, false, true ); } public function provideLegacyMetadataRoundTrip() { @@ -629,4 +723,172 @@ class LocalFileTest extends MediaWikiIntegrationTestCase { }; $this->assertSame( $meta, $file->getMetadata() ); } + + public function provideRecordUpload3() { + $files = [ + 'test.jpg' => [ + 'width' => 20, + 'height' => 20, + 'bits' => 8, + 'metadata' => [ + 'ImageDescription' => 'Test file', + 'XResolution' => '72/1', + 'YResolution' => '72/1', + 'ResolutionUnit' => 2, + 'YCbCrPositioning' => 1, + 'JPEGFileComment' => [ + 'Created with GIMP', + ], + 'MEDIAWIKI_EXIF_VERSION' => 2, + ], + 'fileExists' => true, + 'size' => 437, + 'file-mime' => 'image/jpeg', + 'major_mime' => 'image', + 'minor_mime' => 'jpeg', + 'mime' => 'image/jpeg', + 'sha1' => '620ezvucfyia1mltnavzpqg9gmai2gf', + 'media_type' => 'BITMAP', + ], + 'large-text.pdf' => [ + 'width' => 1275, + 'height' => 1650, + 'fileExists' => true, + 'size' => 10598657, + 'file-mime' => 'application/pdf', + 'major_mime' => 'application', + 'minor_mime' => 'pdf', + 'mime' => 'application/pdf', + 'sha1' => '1o3l1yqjue2diq07grnnyq9kyapfpor', + 'bits' => 0, + 'media_type' => 'OFFICE', + 'metadata' => [ + 'Pages' => '6', + 'text' => [ + 'Page 1 text .................................', + 'Page 2 text .................................', + 'Page 3 text .................................', + 'Page 4 text .................................', + 'Page 5 text .................................', + 'Page 6 text .................................', + ] + ] + ], + 'no-text.pdf' => [ + 'width' => 1275, + 'height' => 1650, + 'fileExists' => true, + 'size' => 10598657, + 'file-mime' => 'application/pdf', + 'major_mime' => 'application', + 'minor_mime' => 'pdf', + 'mime' => 'application/pdf', + 'sha1' => '1o3l1yqjue2diq07grnnyq9kyapfpor', + 'bits' => 0, + 'media_type' => 'OFFICE', + 'metadata' => [ + 'Pages' => '6', + ] + ] + ]; + $configurations = [ + [], + [ 'useJsonMetadata' => true ], + [ + 'useJsonMetadata' => true, + 'useSplitMetadata' => true, + 'splitMetadataThreshold' => 50 + ] + ]; + return ArrayUtils::cartesianProduct( $files, $configurations ); + } + + /** + * Test recordUpload3() and confirm that file properties are reflected back + * after loading the new file from the DB. + * + * @covers LocalFile + * @dataProvider provideRecordUpload3 + * @param array $props File properties + * @param array $conf LocalRepo configuration overrides + */ + public function testRecordUpload3( $props, $conf ) { + $repo = new LocalRepo( + [ + 'class' => LocalRepo::class, + 'name' => 'test', + 'backend' => new FSFileBackend( [ + 'name' => 'test-backend', + 'wikiId' => WikiMap::getCurrentWikiId(), + 'basePath' => '/nonexistent' + ] ) + ] + $conf + ); + $title = Title::newFromText( 'File:Test.jpg' ); + $file = new LocalFile( $title, $repo ); + + if ( $props['mime'] === 'application/pdf' ) { + $mockPdfHandler = new class extends ImageHandler { + public function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) { + } + + public function useSplitMetadata() { + return true; + } + }; + TestingAccessWrapper::newFromObject( $file )->handler = $mockPdfHandler; + } + + $status = $file->recordUpload3( + 'oldver', + 'comment', + 'page text', + $this->getTestSysop()->getUser(), + $props + ); + $this->assertSame( [], $status->getErrors() ); + // Check properties of the same object immediately after upload + $this->assertFileProperties( $file, $props ); + // Check round-trip through the DB + $file = new LocalFile( $title, $repo ); + $this->assertFileProperties( $file, $props ); + } + + /** + * @covers LocalFile + */ + public function testUpload() { + $repo = new LocalRepo( + [ + 'class' => LocalRepo::class, + 'name' => 'test', + 'backend' => new FSFileBackend( [ + 'name' => 'test-backend', + 'wikiId' => WikiMap::getCurrentWikiId(), + 'basePath' => $this->getNewTempDirectory() + ] ) + ] + ); + $title = Title::newFromText( 'File:Test.jpg' ); + $file = new LocalFile( $title, $repo ); + $path = __DIR__ . '/../../../data/media/test.jpg'; + $status = $file->upload( + $path, + 'comment', + 'page text', + 0 + ); + $this->assertSame( [], $status->getErrors() ); + + // Test reupload + $file = new LocalFile( $title, $repo ); + $path = __DIR__ . '/../../../data/media/jpeg-xmp-nullchar.jpg'; + $status = $file->upload( + $path, + 'comment', + 'page text', + 0 + ); + $this->assertSame( [], $status->getErrors() ); + } }