Add Special:ListDuplicatedFiles expensive query special page.

I saw some comments recently on commons suggesting that
better ways are needed to manage duplicate files (There are tools
for if a specific file is a duplicate, but no backlog of
outstanding duplicate files).

This seems like a fairly easy first step in that direction.

Wasn't sure if this should be an image gallery type
query page, or just a list. I think in this case a plain
list is more useful.

Change-Id: Ibe4b9da71ca6451ec4e6b0050feaf3ca70e1b888
This commit is contained in:
Brian Wolff 2013-09-21 16:08:59 -03:00
parent c59a60aecc
commit 24276faf68
7 changed files with 131 additions and 0 deletions

View file

@ -953,6 +953,7 @@ $wgAutoloadLocalClasses = array(
'IPBlockForm' => 'includes/specials/SpecialBlock.php',
'LinkSearchPage' => 'includes/specials/SpecialLinkSearch.php',
'ListredirectsPage' => 'includes/specials/SpecialListredirects.php',
'ListDuplicatedFilesPage' => 'includes/specials/SpecialListDuplicatedFiles.php',
'LoginForm' => 'includes/specials/SpecialUserlogin.php',
'LonelyPagesPage' => 'includes/specials/SpecialLonelypages.php',
'LongPagesPage' => 'includes/specials/SpecialLongpages.php',

View file

@ -77,6 +77,7 @@ abstract class QueryPage extends SpecialPage {
array( 'DeadendPagesPage', 'Deadendpages' ),
array( 'DoubleRedirectsPage', 'DoubleRedirects' ),
array( 'FileDuplicateSearchPage', 'FileDuplicateSearch' ),
array( 'ListDuplicatedFilesPage', 'ListDuplicatedFiles'),
array( 'LinkSearchPage', 'LinkSearch' ),
array( 'ListredirectsPage', 'Listredirects' ),
array( 'LonelyPagesPage', 'Lonelypages' ),

View file

@ -117,6 +117,7 @@ class SpecialPageFactory {
'FileDuplicateSearch' => 'FileDuplicateSearchPage',
'Upload' => 'SpecialUpload',
'UploadStash' => 'SpecialUploadStash',
'ListDuplicatedFiles' => 'ListDuplicatedFilesPage',
// Data and tools
'Statistics' => 'SpecialStatistics',

View file

@ -0,0 +1,112 @@
<?php
/**
* Implements Special:ListDuplicatedFiles
*
* Copyright © 2013 Brian Wolff
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup SpecialPage
* @author Brian Wolff
*/
/**
* Special:ListDuplicatedFiles Lists all files where the current version is
* a duplicate of the current version of some other file.
* @ingroup SpecialPage
*/
class ListDuplicatedFilesPage extends QueryPage {
function __construct( $name = 'ListDuplicatedFiles' ) {
parent::__construct( $name );
}
function isExpensive() {
return true;
}
function isSyndicated() {
return false;
}
/**
* Get all the duplicates by grouping on sha1s.
*
* A cheaper (but less useful) version of this
* query would be to not care how many duplicates a
* particular file has, and do a self-join on image table.
* However this version should be no more expensive then
* Special:MostLinked, which seems to get handled fine
* with however we are doing cached special pages.
*/
function getQueryInfo() {
return array(
'tables' => array( 'image' ),
'fields' => array(
'namespace' => NS_FILE,
'title' => 'MIN(img_name)',
'value' => 'count(*)'
),
'options' => array(
'GROUP BY' => 'img_sha1',
'HAVING' => 'count(*) > 1',
),
);
}
/**
* Pre-fill the link cache
*
* @param DatabaseBase $db
* @param ResultWrapper $res
*/
function preprocessResults( $db, $res ) {
if ( $res->numRows() > 0 ) {
$linkBatch = new LinkBatch();
foreach ( $res as $row ) {
$linkBatch->add( $row->namespace, $row->title );
}
$res->seek( 0 );
$linkBatch->execute();
}
}
/**
* @param Skin $skin
* @param object $result Result row
* @return string
*/
function formatResult( $skin, $result ) {
// Future version might include a list of the first 5 duplicates
// perhaps separated by an "↔".
$image1 = Title::makeTitle( $result->namespace, $result->title );
$dupeSearch = SpecialPage::getTitleFor( 'FileDuplicateSearch', $image1->getDBKey() );
$msg = wfMessage( 'listduplicatedfiles-entry' )
->params( $image1->getText() )
->numParams( $result->value - 1 )
->params( $dupeSearch->getPrefixedDBKey() );
return $msg->parse();
}
protected function getGroupName() {
return 'media';
}
}

View file

@ -423,6 +423,7 @@ $specialPageAliases = array(
'Listfiles' => array( 'ListFiles', 'FileList', 'ImageList' ),
'Listgrouprights' => array( 'ListGroupRights', 'UserGroupRights' ),
'Listredirects' => array( 'ListRedirects' ),
'ListDuplicatedFiles' => array( 'ListDuplicatedFiles', 'ListFileDuplicates' ),
'Listusers' => array( 'ListUsers', 'UserList' ),
'Lockdb' => array( 'LockDB' ),
'Log' => array( 'Log', 'Logs' ),
@ -2641,6 +2642,10 @@ Input: contenttype/subtype, e.g. <code>image/jpeg</code>.',
'listredirects' => 'List of redirects',
'listredirects-summary' => '', # do not translate or duplicate this message to other languages
'listduplicatedfiles' => 'List of files with duplicates',
'listduplicatedfiles-summary' => 'This is a list of files where the most recent version of the file is a duplicate of the most recent version of some other file. Only local files are considered.',
'listduplicatedfiles-entry' => '[[:File:$1|$1]] has [[$3|{{PLURAL:$2|a duplicate|$2 duplicates}}]].',
# Unused templates
'unusedtemplates' => 'Unused templates',
'unusedtemplates-summary' => '', # do not translate or duplicate this message to other languages

View file

@ -4895,6 +4895,11 @@ See also:
# List redirects
'listredirects' => '{{doc-special|ListRedirects}}',
# List duplicates
'listduplicatedfiles' => '{{doc-special|ListDuplicatedFiles}}',
'listduplicatedfiles-summary' => 'Summary at top of Special:ListDuplicatedFiles',
'listduplicatedfiles-entry' => 'A list item on Special:ListDuplicatedFiles. $1 is the file name (no namespace prefix). $2 is the number of duplicates this file has. $3 is the name of the duplicate search page (aka "Special:FileDuplicateSearch/Foo.png" or "Spécial:Recherche_fichier_en_double/Firefox.png")',
# Unused templates
'unusedtemplates' => '{{doc-special|UnusedTemplates}}',
'unusedtemplatestext' => 'Shown on top of [[Special:Unusedtemplates]]',

View file

@ -1690,6 +1690,11 @@ $wgMessageStructure = array(
'listredirects',
'listredirects-summary',
),
'listduplicatedfiles' => array(
'listduplicatedfiles',
'listduplicatedfiles-summary',
'listduplicatedfiles-entry',
),
'unusedtemplates' => array(
'unusedtemplates',
'unusedtemplates-summary',
@ -4103,6 +4108,7 @@ future releases. Also note that since each list value is wrapped in a unique
'filedelete' => 'File deletion',
'mimesearch' => 'MIME search',
'unwatchedpages' => 'Unwatched pages',
'listduplicatedfiles' => 'List duplicated files special page',
'listredirects' => 'List redirects',
'unusedtemplates' => 'Unused templates',
'randompage' => 'Random page',