media: Port DjVuImage::retrieveMetaData() to use BoxedCommand

The shellouts to djvudump and djvutxt are combined with a wrapping shell
script, scripts/retrieveMetaData.sh so the djvu file only needs to be
transmitted to a Shellbox service once.

Windows users can change the new $wgDjvuShell setting to point to a
different shell.

Bug: T289228
Change-Id: Ic4e3dff246792692a952da0e8f9b4a83d1b6b963
This commit is contained in:
Kunal Mehta 2021-09-10 15:28:22 -07:00 committed by Amir Sarabadani
parent 6e4f771462
commit 9834e56f38
4 changed files with 78 additions and 15 deletions

View file

@ -32,6 +32,9 @@ For notes on 1.36.x and older releases, see HISTORY.
* $wgMaxImageArea - This setting may now be set to false to disable size
checking before scaling. Extensions can still override its value by
using the BitmapHandlerCheckImageArea hook.
* $wgDjvuDump, $wgDjvuRenderer, $wgDjvuTxt now only accept explicit path.
'djvudump' will not work and must be changed to '/usr/bin/djvudump'
for example.
* …
==== Removed configuration ====

View file

@ -1790,21 +1790,21 @@ $wgImagePreconnect = false;
/**
* Path of the djvudump executable
* Enable this and $wgDjvuRenderer to enable djvu rendering
* example: $wgDjvuDump = 'djvudump';
* example: $wgDjvuDump = '/usr/bin/djvudump';
*/
$wgDjvuDump = null;
/**
* Path of the ddjvu DJVU renderer
* Enable this and $wgDjvuDump to enable djvu rendering
* example: $wgDjvuRenderer = 'ddjvu';
* example: $wgDjvuRenderer = '/usr/bin/ddjvu';
*/
$wgDjvuRenderer = null;
/**
* Path of the djvutxt DJVU text extraction utility
* Enable this and $wgDjvuDump to enable text layer extraction from djvu files
* example: $wgDjvuTxt = 'djvutxt';
* example: $wgDjvuTxt = '/usr/bin/djvutxt';
*/
$wgDjvuTxt = null;
@ -1820,6 +1820,14 @@ $wgDjvuPostProcessor = 'pnmtojpeg';
*/
$wgDjvuOutputExtension = 'jpg';
/**
* Shell to use when getting djvu metadata
*
* @var string
* @since 1.37
*/
$wgDjvuShell = '/bin/sh';
// endregion -- end of DJvu
// endregion -- end of file uploads

View file

@ -24,7 +24,7 @@
* @ingroup Media
*/
use MediaWiki\Shell\Shell;
use MediaWiki\MediaWikiServices;
/**
* Support for detecting/validating DjVu image files and getting
@ -249,28 +249,51 @@ class DjVuImage {
* @return string|null|false
*/
public function retrieveMetaData() {
global $wgDjvuDump, $wgDjvuTxt;
global $wgDjvuDump, $wgDjvuTxt, $wgDjvuShell;
if ( !$this->isValid() ) {
return false;
}
if ( isset( $wgDjvuDump ) ) {
# djvudump is faster than djvutoxml (now abandoned) as of version 3.5
# https://sourceforge.net/p/djvu/bugs/71/
$cmd = Shell::escape( $wgDjvuDump ) . ' ' . Shell::escape( $this->mFilename );
$dump = wfShellExec( $cmd );
$command = MediaWikiServices::getInstance()->getShellCommandFactory()
->createBoxed( 'media' )
->disableNetwork()
->firejailDefaultSeccomp()
->routeName( 'djvu-metadata' );
$command
->params( $wgDjvuShell, 'scripts/retrieveMetaData.sh' )
->inputFileFromFile(
'scripts/retrieveMetaData.sh',
__DIR__ . '/scripts/retrieveMetaData.sh' )
->inputFileFromFile( 'file.djvu', $this->mFilename )
->memoryLimit( self::DJVUTXT_MEMORY_LIMIT );
$env = [];
if ( $wgDjvuDump !== null ) {
$env['DJVU_DUMP'] = $wgDjvuDump;
$command->outputFileToString( 'dump' );
}
if ( $wgDjvuTxt !== null ) {
$env['DJVU_TXT'] = $wgDjvuTxt;
$command->outputFileToString( 'txt' );
$command->outputFileToString( 'txt_exit_code' );
}
$result = $command
->environment( $env )
->execute();
if ( $wgDjvuDump !== null ) {
$dump = $result->getFileContents( 'dump' );
$xml = $this->convertDumpToXML( $dump );
} else {
$xml = null;
}
# Text layer
if ( isset( $wgDjvuTxt ) ) {
$cmd = Shell::escape( $wgDjvuTxt ) . ' --detail=page ' . Shell::escape( $this->mFilename );
wfDebug( __METHOD__ . ": $cmd" );
$retval = '';
$txt = wfShellExec( $cmd, $retval, [], [ 'memory' => self::DJVUTXT_MEMORY_LIMIT ] );
if ( $wgDjvuTxt !== null ) {
$retval = (int)trim( $result->getFileContents( 'txt_exit_code' ) );
if ( $retval == 0 ) {
$txt = $result->getFileContents( 'txt' );
# Strip some control characters
# Ignore carriage returns
$txt = preg_replace( "/\\\\013/", "", $txt );

View file

@ -0,0 +1,29 @@
#!/bin/sh
# Get parameters from environment
export DJVU_DUMP="${DJVU_DUMP:-djvudump}"
export DJVU_TXT="${DJVU_TXT:-djvutxt}"
runDump() {
# djvudump is faster than djvutoxml (now abandoned) as of version 3.5
# https://sourceforge.net/p/djvu/bugs/71/
"$DJVU_DUMP" file.djvu > dump
}
runTxt() {
# Text layer
"$DJVU_TXT" \
--detail=page \
file.djvu > txt
# Store exit code so we can use it later
echo $? > txt_exit_code
}
if [ -x "$DJVU_DUMP" ]; then
runDump
fi
if [ -x "$DJVU_TXT" ]; then
runTxt
fi