Add Special:ListDuplicatedFiles expensive query special page.
I saw some comments recently on commons suggesting that better ways are needed to manage duplicate files (There are tools for if a specific file is a duplicate, but no backlog of outstanding duplicate files). This seems like a fairly easy first step in that direction. Wasn't sure if this should be an image gallery type query page, or just a list. I think in this case a plain list is more useful. Change-Id: Ibe4b9da71ca6451ec4e6b0050feaf3ca70e1b888
This commit is contained in:
parent
c59a60aecc
commit
24276faf68
7 changed files with 131 additions and 0 deletions
|
|
@ -953,6 +953,7 @@ $wgAutoloadLocalClasses = array(
|
|||
'IPBlockForm' => 'includes/specials/SpecialBlock.php',
|
||||
'LinkSearchPage' => 'includes/specials/SpecialLinkSearch.php',
|
||||
'ListredirectsPage' => 'includes/specials/SpecialListredirects.php',
|
||||
'ListDuplicatedFilesPage' => 'includes/specials/SpecialListDuplicatedFiles.php',
|
||||
'LoginForm' => 'includes/specials/SpecialUserlogin.php',
|
||||
'LonelyPagesPage' => 'includes/specials/SpecialLonelypages.php',
|
||||
'LongPagesPage' => 'includes/specials/SpecialLongpages.php',
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@ abstract class QueryPage extends SpecialPage {
|
|||
array( 'DeadendPagesPage', 'Deadendpages' ),
|
||||
array( 'DoubleRedirectsPage', 'DoubleRedirects' ),
|
||||
array( 'FileDuplicateSearchPage', 'FileDuplicateSearch' ),
|
||||
array( 'ListDuplicatedFilesPage', 'ListDuplicatedFiles'),
|
||||
array( 'LinkSearchPage', 'LinkSearch' ),
|
||||
array( 'ListredirectsPage', 'Listredirects' ),
|
||||
array( 'LonelyPagesPage', 'Lonelypages' ),
|
||||
|
|
|
|||
|
|
@ -117,6 +117,7 @@ class SpecialPageFactory {
|
|||
'FileDuplicateSearch' => 'FileDuplicateSearchPage',
|
||||
'Upload' => 'SpecialUpload',
|
||||
'UploadStash' => 'SpecialUploadStash',
|
||||
'ListDuplicatedFiles' => 'ListDuplicatedFilesPage',
|
||||
|
||||
// Data and tools
|
||||
'Statistics' => 'SpecialStatistics',
|
||||
|
|
|
|||
112
includes/specials/SpecialListDuplicatedFiles.php
Normal file
112
includes/specials/SpecialListDuplicatedFiles.php
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
<?php
|
||||
/**
|
||||
* Implements Special:ListDuplicatedFiles
|
||||
*
|
||||
* Copyright © 2013 Brian Wolff
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
* @file
|
||||
* @ingroup SpecialPage
|
||||
* @author Brian Wolff
|
||||
*/
|
||||
|
||||
/**
|
||||
* Special:ListDuplicatedFiles Lists all files where the current version is
|
||||
* a duplicate of the current version of some other file.
|
||||
* @ingroup SpecialPage
|
||||
*/
|
||||
class ListDuplicatedFilesPage extends QueryPage {
|
||||
function __construct( $name = 'ListDuplicatedFiles' ) {
|
||||
parent::__construct( $name );
|
||||
}
|
||||
|
||||
function isExpensive() {
|
||||
return true;
|
||||
}
|
||||
|
||||
function isSyndicated() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all the duplicates by grouping on sha1s.
|
||||
*
|
||||
* A cheaper (but less useful) version of this
|
||||
* query would be to not care how many duplicates a
|
||||
* particular file has, and do a self-join on image table.
|
||||
* However this version should be no more expensive then
|
||||
* Special:MostLinked, which seems to get handled fine
|
||||
* with however we are doing cached special pages.
|
||||
*/
|
||||
function getQueryInfo() {
|
||||
return array(
|
||||
'tables' => array( 'image' ),
|
||||
'fields' => array(
|
||||
'namespace' => NS_FILE,
|
||||
'title' => 'MIN(img_name)',
|
||||
'value' => 'count(*)'
|
||||
),
|
||||
'options' => array(
|
||||
'GROUP BY' => 'img_sha1',
|
||||
'HAVING' => 'count(*) > 1',
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre-fill the link cache
|
||||
*
|
||||
* @param DatabaseBase $db
|
||||
* @param ResultWrapper $res
|
||||
*/
|
||||
function preprocessResults( $db, $res ) {
|
||||
if ( $res->numRows() > 0 ) {
|
||||
$linkBatch = new LinkBatch();
|
||||
|
||||
foreach ( $res as $row ) {
|
||||
$linkBatch->add( $row->namespace, $row->title );
|
||||
}
|
||||
|
||||
$res->seek( 0 );
|
||||
$linkBatch->execute();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param Skin $skin
|
||||
* @param object $result Result row
|
||||
* @return string
|
||||
*/
|
||||
function formatResult( $skin, $result ) {
|
||||
// Future version might include a list of the first 5 duplicates
|
||||
// perhaps separated by an "↔".
|
||||
$image1 = Title::makeTitle( $result->namespace, $result->title );
|
||||
$dupeSearch = SpecialPage::getTitleFor( 'FileDuplicateSearch', $image1->getDBKey() );
|
||||
|
||||
$msg = wfMessage( 'listduplicatedfiles-entry' )
|
||||
->params( $image1->getText() )
|
||||
->numParams( $result->value - 1 )
|
||||
->params( $dupeSearch->getPrefixedDBKey() );
|
||||
|
||||
return $msg->parse();
|
||||
}
|
||||
|
||||
protected function getGroupName() {
|
||||
return 'media';
|
||||
}
|
||||
}
|
||||
|
|
@ -423,6 +423,7 @@ $specialPageAliases = array(
|
|||
'Listfiles' => array( 'ListFiles', 'FileList', 'ImageList' ),
|
||||
'Listgrouprights' => array( 'ListGroupRights', 'UserGroupRights' ),
|
||||
'Listredirects' => array( 'ListRedirects' ),
|
||||
'ListDuplicatedFiles' => array( 'ListDuplicatedFiles', 'ListFileDuplicates' ),
|
||||
'Listusers' => array( 'ListUsers', 'UserList' ),
|
||||
'Lockdb' => array( 'LockDB' ),
|
||||
'Log' => array( 'Log', 'Logs' ),
|
||||
|
|
@ -2641,6 +2642,10 @@ Input: contenttype/subtype, e.g. <code>image/jpeg</code>.',
|
|||
'listredirects' => 'List of redirects',
|
||||
'listredirects-summary' => '', # do not translate or duplicate this message to other languages
|
||||
|
||||
'listduplicatedfiles' => 'List of files with duplicates',
|
||||
'listduplicatedfiles-summary' => 'This is a list of files where the most recent version of the file is a duplicate of the most recent version of some other file. Only local files are considered.',
|
||||
'listduplicatedfiles-entry' => '[[:File:$1|$1]] has [[$3|{{PLURAL:$2|a duplicate|$2 duplicates}}]].',
|
||||
|
||||
# Unused templates
|
||||
'unusedtemplates' => 'Unused templates',
|
||||
'unusedtemplates-summary' => '', # do not translate or duplicate this message to other languages
|
||||
|
|
|
|||
|
|
@ -4895,6 +4895,11 @@ See also:
|
|||
# List redirects
|
||||
'listredirects' => '{{doc-special|ListRedirects}}',
|
||||
|
||||
# List duplicates
|
||||
'listduplicatedfiles' => '{{doc-special|ListDuplicatedFiles}}',
|
||||
'listduplicatedfiles-summary' => 'Summary at top of Special:ListDuplicatedFiles',
|
||||
'listduplicatedfiles-entry' => 'A list item on Special:ListDuplicatedFiles. $1 is the file name (no namespace prefix). $2 is the number of duplicates this file has. $3 is the name of the duplicate search page (aka "Special:FileDuplicateSearch/Foo.png" or "Spécial:Recherche_fichier_en_double/Firefox.png")',
|
||||
|
||||
# Unused templates
|
||||
'unusedtemplates' => '{{doc-special|UnusedTemplates}}',
|
||||
'unusedtemplatestext' => 'Shown on top of [[Special:Unusedtemplates]]',
|
||||
|
|
|
|||
|
|
@ -1690,6 +1690,11 @@ $wgMessageStructure = array(
|
|||
'listredirects',
|
||||
'listredirects-summary',
|
||||
),
|
||||
'listduplicatedfiles' => array(
|
||||
'listduplicatedfiles',
|
||||
'listduplicatedfiles-summary',
|
||||
'listduplicatedfiles-entry',
|
||||
),
|
||||
'unusedtemplates' => array(
|
||||
'unusedtemplates',
|
||||
'unusedtemplates-summary',
|
||||
|
|
@ -4103,6 +4108,7 @@ future releases. Also note that since each list value is wrapped in a unique
|
|||
'filedelete' => 'File deletion',
|
||||
'mimesearch' => 'MIME search',
|
||||
'unwatchedpages' => 'Unwatched pages',
|
||||
'listduplicatedfiles' => 'List duplicated files special page',
|
||||
'listredirects' => 'List redirects',
|
||||
'unusedtemplates' => 'Unused templates',
|
||||
'randompage' => 'Random page',
|
||||
|
|
|
|||
Loading…
Reference in a new issue