media: Port DjVuImage::retrieveMetaData() to use BoxedCommand
The shellouts to djvudump and djvutxt are combined with a wrapping shell script, scripts/retrieveMetaData.sh so the djvu file only needs to be transmitted to a Shellbox service once. Windows users can change the new $wgDjvuShell setting to point to a different shell. Bug: T289228 Change-Id: Ic4e3dff246792692a952da0e8f9b4a83d1b6b963
This commit is contained in:
parent
6e4f771462
commit
9834e56f38
4 changed files with 78 additions and 15 deletions
|
|
@ -32,6 +32,9 @@ For notes on 1.36.x and older releases, see HISTORY.
|
|||
* $wgMaxImageArea - This setting may now be set to false to disable size
|
||||
checking before scaling. Extensions can still override its value by
|
||||
using the BitmapHandlerCheckImageArea hook.
|
||||
* $wgDjvuDump, $wgDjvuRenderer, $wgDjvuTxt now only accept explicit path.
|
||||
'djvudump' will not work and must be changed to '/usr/bin/djvudump'
|
||||
for example.
|
||||
* …
|
||||
|
||||
==== Removed configuration ====
|
||||
|
|
|
|||
|
|
@ -1790,21 +1790,21 @@ $wgImagePreconnect = false;
|
|||
/**
|
||||
* Path of the djvudump executable
|
||||
* Enable this and $wgDjvuRenderer to enable djvu rendering
|
||||
* example: $wgDjvuDump = 'djvudump';
|
||||
* example: $wgDjvuDump = '/usr/bin/djvudump';
|
||||
*/
|
||||
$wgDjvuDump = null;
|
||||
|
||||
/**
|
||||
* Path of the ddjvu DJVU renderer
|
||||
* Enable this and $wgDjvuDump to enable djvu rendering
|
||||
* example: $wgDjvuRenderer = 'ddjvu';
|
||||
* example: $wgDjvuRenderer = '/usr/bin/ddjvu';
|
||||
*/
|
||||
$wgDjvuRenderer = null;
|
||||
|
||||
/**
|
||||
* Path of the djvutxt DJVU text extraction utility
|
||||
* Enable this and $wgDjvuDump to enable text layer extraction from djvu files
|
||||
* example: $wgDjvuTxt = 'djvutxt';
|
||||
* example: $wgDjvuTxt = '/usr/bin/djvutxt';
|
||||
*/
|
||||
$wgDjvuTxt = null;
|
||||
|
||||
|
|
@ -1820,6 +1820,14 @@ $wgDjvuPostProcessor = 'pnmtojpeg';
|
|||
*/
|
||||
$wgDjvuOutputExtension = 'jpg';
|
||||
|
||||
/**
|
||||
* Shell to use when getting djvu metadata
|
||||
*
|
||||
* @var string
|
||||
* @since 1.37
|
||||
*/
|
||||
$wgDjvuShell = '/bin/sh';
|
||||
|
||||
// endregion -- end of DJvu
|
||||
|
||||
// endregion -- end of file uploads
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@
|
|||
* @ingroup Media
|
||||
*/
|
||||
|
||||
use MediaWiki\Shell\Shell;
|
||||
use MediaWiki\MediaWikiServices;
|
||||
|
||||
/**
|
||||
* Support for detecting/validating DjVu image files and getting
|
||||
|
|
@ -249,28 +249,51 @@ class DjVuImage {
|
|||
* @return string|null|false
|
||||
*/
|
||||
public function retrieveMetaData() {
|
||||
global $wgDjvuDump, $wgDjvuTxt;
|
||||
global $wgDjvuDump, $wgDjvuTxt, $wgDjvuShell;
|
||||
|
||||
if ( !$this->isValid() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( isset( $wgDjvuDump ) ) {
|
||||
# djvudump is faster than djvutoxml (now abandoned) as of version 3.5
|
||||
# https://sourceforge.net/p/djvu/bugs/71/
|
||||
$cmd = Shell::escape( $wgDjvuDump ) . ' ' . Shell::escape( $this->mFilename );
|
||||
$dump = wfShellExec( $cmd );
|
||||
$command = MediaWikiServices::getInstance()->getShellCommandFactory()
|
||||
->createBoxed( 'media' )
|
||||
->disableNetwork()
|
||||
->firejailDefaultSeccomp()
|
||||
->routeName( 'djvu-metadata' );
|
||||
$command
|
||||
->params( $wgDjvuShell, 'scripts/retrieveMetaData.sh' )
|
||||
->inputFileFromFile(
|
||||
'scripts/retrieveMetaData.sh',
|
||||
__DIR__ . '/scripts/retrieveMetaData.sh' )
|
||||
->inputFileFromFile( 'file.djvu', $this->mFilename )
|
||||
->memoryLimit( self::DJVUTXT_MEMORY_LIMIT );
|
||||
|
||||
$env = [];
|
||||
if ( $wgDjvuDump !== null ) {
|
||||
$env['DJVU_DUMP'] = $wgDjvuDump;
|
||||
$command->outputFileToString( 'dump' );
|
||||
}
|
||||
if ( $wgDjvuTxt !== null ) {
|
||||
$env['DJVU_TXT'] = $wgDjvuTxt;
|
||||
$command->outputFileToString( 'txt' );
|
||||
$command->outputFileToString( 'txt_exit_code' );
|
||||
}
|
||||
|
||||
$result = $command
|
||||
->environment( $env )
|
||||
->execute();
|
||||
if ( $wgDjvuDump !== null ) {
|
||||
$dump = $result->getFileContents( 'dump' );
|
||||
$xml = $this->convertDumpToXML( $dump );
|
||||
} else {
|
||||
$xml = null;
|
||||
}
|
||||
|
||||
# Text layer
|
||||
if ( isset( $wgDjvuTxt ) ) {
|
||||
$cmd = Shell::escape( $wgDjvuTxt ) . ' --detail=page ' . Shell::escape( $this->mFilename );
|
||||
wfDebug( __METHOD__ . ": $cmd" );
|
||||
$retval = '';
|
||||
$txt = wfShellExec( $cmd, $retval, [], [ 'memory' => self::DJVUTXT_MEMORY_LIMIT ] );
|
||||
if ( $wgDjvuTxt !== null ) {
|
||||
$retval = (int)trim( $result->getFileContents( 'txt_exit_code' ) );
|
||||
if ( $retval == 0 ) {
|
||||
$txt = $result->getFileContents( 'txt' );
|
||||
# Strip some control characters
|
||||
# Ignore carriage returns
|
||||
$txt = preg_replace( "/\\\\013/", "", $txt );
|
||||
|
|
|
|||
29
includes/media/scripts/retrieveMetaData.sh
Normal file
29
includes/media/scripts/retrieveMetaData.sh
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Get parameters from environment
|
||||
|
||||
export DJVU_DUMP="${DJVU_DUMP:-djvudump}"
|
||||
export DJVU_TXT="${DJVU_TXT:-djvutxt}"
|
||||
|
||||
runDump() {
|
||||
# djvudump is faster than djvutoxml (now abandoned) as of version 3.5
|
||||
# https://sourceforge.net/p/djvu/bugs/71/
|
||||
"$DJVU_DUMP" file.djvu > dump
|
||||
}
|
||||
|
||||
runTxt() {
|
||||
# Text layer
|
||||
"$DJVU_TXT" \
|
||||
--detail=page \
|
||||
file.djvu > txt
|
||||
# Store exit code so we can use it later
|
||||
echo $? > txt_exit_code
|
||||
}
|
||||
|
||||
if [ -x "$DJVU_DUMP" ]; then
|
||||
runDump
|
||||
fi
|
||||
|
||||
if [ -x "$DJVU_TXT" ]; then
|
||||
runTxt
|
||||
fi
|
||||
Loading…
Reference in a new issue