This is a schema change. It's only a table creation, but the table must be created on Wikimedia servers before this revision goes live. The maintenance script populateCategory.php should be run when convenient. If it's not run, there's only one substantial case where display will be harmed: the page of a category with more than 200 net pages added since the patch goes live will give an erroneously low count. In other cases category pages will just be better-worded, and it will recognize the count in the table is bogus.

* Adds Category and CategoryList classes to represent categories themselves.
* Adds a category table, giving each category a name, ID, and counts of all members, subcats only, and files.
* Adds a maintenance script to populate the category table efficiently.  This script is careful to wait for slaves and should be safe to run on a live database.  The maintenance script's includes file is called by update.php.
* Until the category table is populated, the patch handles weird category table rows gracefully.  It detects whether they're obviously impossible, and if so, it outputs appropriate messages.
This commit is contained in:
Aryeh Gregor 2008-03-18 00:17:28 +00:00
parent b7bf2dd850
commit 80a5874828
12 changed files with 675 additions and 25 deletions

View file

@ -46,6 +46,8 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN
link on diffs
* Magic word formatnum can now take raw suffix to undo formatting
* Add updatelog table to reliably permit updates that don't change the schema
* Add category table to allow better tracking of category membership counts
** (bug 1212) Give correct membership counts on the pages of large categories
=== Bug fixes in 1.13 ===

View file

@ -2259,12 +2259,20 @@ class Article {
# Delete restrictions for it
$dbw->delete( 'page_restrictions', array ( 'pr_page' => $id ), __METHOD__ );
# Fix category table counts
$cats = array();
$res = $dbw->select( 'categorylinks', 'cl_to',
array( 'cl_from' => $id ), __METHOD__ );
foreach( $res as $row ) {
$cats []= $row->cl_to;
}
$this->updateCategoryCounts( array(), $cats, $dbw );
# Now that it's safely backed up, delete it
$dbw->delete( 'page', array( 'page_id' => $id ), __METHOD__);
# If using cascading deletes, we can skip some explicit deletes
if ( !$dbw->cascadingDeletes() ) {
$dbw->delete( 'revision', array( 'rev_page' => $id ), __METHOD__ );
if ($wgUseTrackbacks)
@ -3340,4 +3348,55 @@ class Article {
$wgOut->addParserOutput( $parserOutput );
}
/**
* Update all the appropriate counts in the category table, given that
* we've added the categories $added and deleted the categories $deleted.
*
* @param $added array The names of categories that were added
* @param $deleted array The names of categories that were deleted
* @param $dbw Database Optional database connection to use
* @return null
*/
public function updateCategoryCounts( $added, $deleted, $dbw = null ) {
$ns = $this->mTitle->getNamespace();
if( !$dbw ) {
$dbw = wfGetDB( DB_MASTER );
}
# First make sure the rows exist. If one of the "deleted" ones didn't
# exist, we might legitimately not create it, but it's simpler to just
# create it and then give it a negative value, since the value is bogus
# anyway.
#
# Sometimes I wish we had INSERT ... ON DUPLICATE KEY UPDATE.
$insertCats = array_merge( $added, $deleted );
$insertRows = array();
foreach( $insertCats as $cat ) {
$insertRows []= array( 'cat_title' => $cat );
}
$dbw->insert( 'category', $insertRows, __METHOD__, 'IGNORE' );
$addFields = array( 'cat_pages = cat_pages + 1' );
$removeFields = array( 'cat_pages = cat_pages - 1' );
if( $ns == NS_CATEGORY ) {
$addFields []= 'cat_subcats = cat_subcats + 1';
$removeFields []= 'cat_subcats = cat_subcats - 1';
} elseif( $ns == NS_IMAGE ) {
$addFields []= 'cat_files = cat_files + 1';
$removeFields []= 'cat_files = cat_files - 1';
}
$dbw->update(
'category',
$addFields,
array( 'cat_title' => $added ),
__METHOD__
);
$dbw->update(
'category',
$removeFields,
array( 'cat_title' => $deleted ),
__METHOD__
);
}
}

View file

@ -25,7 +25,9 @@ function __autoload($className) {
'BagOStuff' => 'includes/BagOStuff.php',
'Block' => 'includes/Block.php',
'BrokenRedirectsPage' => 'includes/SpecialBrokenRedirects.php',
'Category' => 'includes/Category.php',
'Categoryfinder' => 'includes/Categoryfinder.php',
'CategoryList' => 'includes/Category.php',
'CategoryPage' => 'includes/CategoryPage.php',
'CategoryViewer' => 'includes/CategoryPage.php',
'ChangesList' => 'includes/ChangesList.php',

305
includes/Category.php Normal file
View file

@ -0,0 +1,305 @@
<?php
/**
* Two classes, Category and CategoryList, to deal with categories. To reduce
* code duplication, most of the logic is implemented for lists of categories,
* and then single categories are a special case. We use a separate class for
* CategoryList so as to discourage stupid slow memory-hogging stuff like manu-
* ally iterating through arrays of Titles and Articles, which we do way too
* much, when a smarter class can do stuff all in one query.
*
* Category(List) objects are immutable, strictly speaking. If you call me-
* thods that change the database, like to refresh link counts, the objects
* will be appropriately reinitialized. Member variables are lazy-initialized.
*
* TODO: Move some stuff from CategoryPage.php to here, and use that.
*
* @author Simetrical
*/
abstract class CategoryListBase {
# FIXME: Is storing all member variables as simple arrays a good idea?
# Should we use some kind of associative array instead?
/** Names of all member categories, normalized to DB-key form */
protected $mNames = null;
/** IDs of all member categories */
protected $mIDs = null;
/**
* Counts of membership (cat_pages, cat_subcats, cat_files) for all member
* categories
*/
protected $mPages = null, $mSubcats = null, $mFiles = null;
protected function __construct() {}
/** See CategoryList::newFromNames for details. */
protected function setNames( $names ) {
if( !is_array( $names ) ) {
throw new MWException( __METHOD__.' passed non-array' );
}
$this->mNames = array_diff(
array_map(
array( 'CategoryListBase', 'setNamesCallback' ),
$names
),
array( false )
);
}
/**
* @param string $name Name of a putative category
* @return mixed Normalized name, or false if the name was invalid.
*/
private static function setNamesCallback( $name ) {
$title = Title::newFromText( $name );
if( !is_object( $title ) ) {
return false;
}
return $title->getDBKey();
}
/**
* Set up all member variables using a database query.
* @return bool True on success, false on failure.
*/
protected function initialize() {
if( $this->mNames === null && $this->mIDs === null ) {
throw new MWException( __METHOD__.' has both names and IDs null' );
}
$dbr = wfGetDB( DB_SLAVE );
if( $this->mIDs === null ) {
$where = array( 'cat_title' => $this->mNames );
} elseif( $this->mNames === null ) {
$where = array( 'cat_id' => $this->mIDs );
} else {
# Already initialized
return true;
}
$res = $dbr->select(
'category',
array( 'cat_id', 'cat_title', 'cat_pages', 'cat_subcats',
'cat_files' ),
$where,
__METHOD__
);
if( !$res->fetchRow() ) {
# Okay, there were no contents. Nothing to initialize.
return false;
}
$res->rewind();
$this->mIDs = $this->mNames = $this->mPages = $this->mSubcats =
$this->mFiles = array();
while( $row = $res->fetchRow() ) {
$this->mIDs []= $row['cat_id'];
$this->mNames []= $row['cat_title'];
$this->mPages []= $row['cat_pages'];
$this->mSubcats []= $row['cat_subcats'];
$this->mFiles []= $row['cat_files'];
}
$res->free();
}
}
/** @todo make iterable. */
class CategoryList extends CategoryListBase {
/**
* Factory function. Any provided elements that don't correspond to a cat-
* egory that actually exists will be silently dropped. FIXME: Is this
* sane error-handling?
*
* @param array $names An array of category names. They need not be norma-
* lized, with spaces replaced by underscores.
* @return CategoryList
*/
public static function newFromNames( $names ) {
$cat = new self();
$cat->setNames( $names );
return $cat;
}
/**
* Factory function. Any provided elements that don't correspond to a cat-
* egory that actually exists will be silently dropped. FIXME: Is this
* sane error-handling?
*
* @param array $ids An array of category ids
* @return CategoryList
*/
public static function newFromIDs( $ids ) {
if( !is_array( $ids ) ) {
throw new MWException( __METHOD__.' passed non-array' );
}
$cat = new self();
$cat->mIds = $ids;
return $cat;
}
/** @return array Simple array of DB key names */
public function getNames() {
$this->initialize();
return $this->mNames;
}
/**
* FIXME: Is this a good return type?
*
* @return array Associative array of DB key name => ID
*/
public function getIDs() {
$this->initialize();
return array_fill_keys( $this->mNames, $this->mIDs );
}
/**
* FIXME: Is this a good return type?
*
* @return array Associative array of DB key name => array(pages, subcats,
* files)
*/
public function getCounts() {
$this->initialize();
$ret = array();
foreach( array_keys( $this->mNames ) as $i ) {
$ret[$this->mNames[$i]] = array(
$this->mPages[$i],
$this->mSubcats[$i],
$this->mFiles[$i]
);
}
return $ret;
}
}
class Category extends CategoryListBase {
/**
* Factory function.
*
* @param array $name A category name (no "Category:" prefix). It need
* not be normalized, with spaces replaced by underscores.
* @return mixed Category, or false on a totally invalid name
*/
public static function newFromName( $name ) {
$cat = new self();
$cat->setNames( array( $name ) );
if( count( $cat->mNames ) !== 1 ) {
return false;
}
return $cat;
}
/**
* Factory function.
*
* @param array $id A category id
* @return Category
*/
public static function newFromIDs( $id ) {
$cat = new self();
$cat->mIDs = array( $id );
return $cat;
}
/** @return mixed DB key name, or false on failure */
public function getName() { return $this->getX( 'mNames' ); }
/** @return mixed Category ID, or false on failure */
public function getID() { return $this->getX( 'mIDs' ); }
/** @return mixed Total number of member pages, or false on failure */
public function getPageCount() { return $this->getX( 'mPages' ); }
/** @return mixed Number of subcategories, or false on failure */
public function getSubcatCount() { return $this->getX( 'mSubcats' ); }
/** @return mixed Number of member files, or false on failure */
public function getFileCount() { return $this->getX( 'mFiles' ); }
/**
* This is not implemented in the base class, because arrays of Titles are
* evil.
*
* @return mixed The Title for this category, or false on failure.
*/
public function getTitle() {
if( !$this->initialize() ) {
return false;
}
# FIXME is there a better way to do this?
return Title::newFromText( "Category:{$this->mNames[0]}" );
}
/** Generic accessor */
private function getX( $key ) {
if( !$this->initialize() ) {
return false;
}
return $this->{$key}[0];
}
/**
* Override the parent class so that we can return false if things muck
* up, i.e., the name/ID we got was invalid. Currently CategoryList si-
* lently eats errors so as not to kill the whole array for one bad name.
*
* @return bool True on success, false on failure.
*/
protected function initialize() {
parent::initialize();
if( count( $this->mNames ) != 1 || count( $this->mIDs ) != 1 ) {
return false;
}
return true;
}
/**
* Refresh the counts for this category.
*
* FIXME: If there were some way to do this in MySQL 4 without an UPDATE
* for every row, it would be nice to move this to the parent class.
*
* @return bool True on success, false on failure
*/
public function refreshCounts() {
if( wfReadOnly() ) {
return false;
}
$dbw = wfGetDB( DB_MASTER );
$dbw->begin();
# Note, we must use names for this, since categorylinks does.
if( $this->mNames === null ) {
if( !$this->initialize() ) {
return false;
}
} else {
# Let's be sure that the row exists in the table. We don't need to
# do this if we got the row from the table in initialization!
$dbw->insert(
'category',
array( 'cat_title' => $this->mNames[0] ),
__METHOD__,
'IGNORE'
);
}
$result = $dbw->selectRow(
array( 'categorylinks', 'page' ),
array( 'COUNT(*) AS pages',
'COUNT(IF(page_namespace='.NS_CATEGORY.',1,NULL)) AS subcats',
'COUNT(IF(page_namespace='.NS_IMAGE.',1,NULL)) AS files'
),
array( 'cl_to' => $this->mNames[0], 'page_id = cl_from' ),
__METHOD__,
'LOCK IN SHARE MODE'
);
$ret = $dbw->update(
'category',
array(
'cat_pages' => $result->pages,
'cat_subcats' => $result->subcats,
'cat_files' => $result->files
),
array( 'cat_title' => $this->mNames[0] ),
__METHOD__
);
$dbw->commit();
# Now we should update our local counts.
$this->mPages = array( $result->pages );
$this->mSubcats = array( $result->subcats );
$this->mFiles = array( $result->files );
return $ret;
}
}

View file

@ -70,6 +70,8 @@ class CategoryViewer {
$children, $children_start_char,
$showGallery, $gallery,
$skin;
/** Category object for this page */
private $cat;
function __construct( $title, $from = '', $until = '' ) {
global $wgCategoryPagingLimit;
@ -77,6 +79,7 @@ class CategoryViewer {
$this->from = $from;
$this->until = $until;
$this->limit = $wgCategoryPagingLimit;
$this->cat = Category::newFromName( $title->getDBKey() );
}
/**
@ -261,12 +264,14 @@ class CategoryViewer {
function getSubcategorySection() {
# Don't show subcategories section if there are none.
$r = '';
$c = count( $this->children );
if( $c > 0 ) {
$rescnt = count( $this->children );
$dbcnt = $this->cat->getSubcatCount();
$countmsg = $this->getCountMessage( $rescnt, $dbcnt, 'subcat' );
if( $rescnt > 0 ) {
# Showing subcategories
$r .= "<div id=\"mw-subcategories\">\n";
$r .= '<h2>' . wfMsg( 'subcategories' ) . "</h2>\n";
$r .= wfMsgExt( 'subcategorycount', array( 'parse' ), $c );
$r .= $countmsg;
$r .= $this->formatList( $this->children, $this->children_start_char );
$r .= "\n</div>";
}
@ -277,11 +282,20 @@ class CategoryViewer {
$ti = htmlspecialchars( $this->title->getText() );
# Don't show articles section if there are none.
$r = '';
$c = count( $this->articles );
if( $c > 0 ) {
# FIXME, here and in the other two sections: we don't need to bother
# with this rigamarole if the entire category contents fit on one page
# and have already been retrieved. We can just use $rescnt in that
# case and save a query and some logic.
$dbcnt = $this->cat->getPageCount() - $this->cat->getSubcatCount()
- $this->cat->getFileCount();
$rescnt = count( $this->articles );
$countmsg = $this->getCountMessage( $rescnt, $dbcnt, 'article' );
if( $rescnt > 0 ) {
$r = "<div id=\"mw-pages\">\n";
$r .= '<h2>' . wfMsg( 'category_header', $ti ) . "</h2>\n";
$r .= wfMsgExt( 'categoryarticlecount', array( 'parse' ), $c );
$r .= $countmsg;
$r .= $this->formatList( $this->articles, $this->articles_start_char );
$r .= "\n</div>";
}
@ -290,10 +304,13 @@ class CategoryViewer {
function getImageSection() {
if( $this->showGallery && ! $this->gallery->isEmpty() ) {
$dbcnt = $this->cat->getFileCount();
$rescnt = $this->gallery->count();
$countmsg = $this->getCountMessage( $rescnt, $dbcnt, 'file' );
return "<div id=\"mw-category-media\">\n" .
'<h2>' . wfMsg( 'category-media-header', htmlspecialchars($this->title->getText()) ) . "</h2>\n" .
wfMsgExt( 'category-media-count', array( 'parse' ), $this->gallery->count() ) .
$this->gallery->toHTML() . "\n</div>";
$countmsg . $this->gallery->toHTML() . "\n</div>";
} else {
return '';
}
@ -440,6 +457,47 @@ class CategoryViewer {
return "($prevLink) ($nextLink)";
}
/**
* What to do if the category table conflicts with the number of results
* returned? This function says what. It works the same whether the
* things being counted are articles, subcategories, or files.
*
* Note for grepping: uses the messages category-article-count,
* category-article-count-limited, category-subcat-count,
* category-subcat-count-limited, category-file-count,
* category-file-count-limited.
*
* @param int $rescnt The number of items returned by our database query.
* @param int $dbcnt The number of items according to the category table.
* @param string $type 'subcat', 'article', or 'file'
* @return string A message giving the number of items, to output to HTML.
*/
private function getCountMessage( $rescnt, $dbcnt, $type ) {
# There are three cases:
# 1) The category table figure seems sane. It might be wrong, but
# we can't do anything about it if we don't recalculate it on ev-
# ery category view.
# 2) The category table figure isn't sane, like it's smaller than the
# number of actual results, *but* the number of results is less
# than $this->limit and there's no offset. In this case we still
# know the right figure.
# 3) We have no idea.
$totalrescnt = count( $this->articles ) + count( $this->children ) +
$this->gallery->count();
if($dbcnt == $rescnt || (($totalrescnt == $this->limit || $this->from
|| $this->until) && $dbcnt > $rescnt)){
# Case 1: seems sane.
$totalcnt = $dbcnt;
} elseif($totalrescnt < $this->limit && !$this->from && !$this->until){
# Case 2: not sane, but salvageable.
$totalcnt = $rescnt;
} else {
# Case 3: hopeless. Don't give a total count at all.
return wfMsgExt("category-$type-count-limited", 'parse', $rescnt);
}
return wfMsgExt( "category-$type-count", 'parse', $rescnt, $totalcnt );
}
}

View file

@ -124,8 +124,11 @@ class LinksUpdate {
$this->getCategoryInsertions( $existing ) );
# Invalidate all categories which were added, deleted or changed (set symmetric difference)
$categoryUpdates = array_diff_assoc( $existing, $this->mCategories ) + array_diff_assoc( $this->mCategories, $existing );
$categoryInserts = array_diff_assoc( $this->mCategories, $existing );
$categoryDeletes = array_diff_assoc( $existing, $this->mCategories );
$categoryUpdates = $categoryInserts + $categoryDeletes;
$this->invalidateCategories( $categoryUpdates );
$this->updateCategoryCounts( $categoryInserts, $categoryDeletes );
# Page properties
$existing = $this->getExistingProperties();
@ -155,7 +158,9 @@ class LinksUpdate {
# Refresh category pages and image description pages
$existing = $this->getExistingCategories();
$categoryUpdates = array_diff_assoc( $existing, $this->mCategories ) + array_diff_assoc( $this->mCategories, $existing );
$categoryInserts = array_diff_assoc( $this->mCategories, $existing );
$categoryDeletes = array_diff_assoc( $existing, $this->mCategoties );
$categoryUpdates = $categoryInserts + $categoryDeletes;
$existing = $this->getExistingImages();
$imageUpdates = array_diff_key( $existing, $this->mImages ) + array_diff_key( $this->mImages, $existing );
@ -167,8 +172,10 @@ class LinksUpdate {
$this->dumbTableUpdate( 'langlinks', $this->getInterlangInsertions(),'ll_from' );
$this->dumbTableUpdate( 'page_props', $this->getPropertyInsertions(), 'pp_page' );
# Update the cache of all the category pages and image description pages which were changed
# Update the cache of all the category pages and image description
# pages which were changed, and fix the category table count
$this->invalidateCategories( $categoryUpdates );
$this->updateCategoryCounts( $categoryInserts, $categoryDeletes );
$this->invalidateImageDescriptions( $imageUpdates );
# Refresh links of all pages including this page
@ -261,6 +268,18 @@ class LinksUpdate {
$this->invalidatePages( NS_CATEGORY, array_keys( $cats ) );
}
/**
* Update all the appropriate counts in the category table.
* @param $added associative array of category name => sort key
* @param $deleted associative array of category name => sort key
*/
function updateCategoryCounts( $added, $deleted ) {
$a = new Article($this->mTitle);
$a->updateCategoryCounts(
array_keys( $added ), array_keys( $deleted ), $this->mDb
);
}
function invalidateImageDescriptions( $images ) {
$this->invalidatePages( NS_IMAGE, array_keys( $images ) );
}
@ -268,9 +287,9 @@ class LinksUpdate {
function dumbTableUpdate( $table, $insertions, $fromField ) {
$this->mDb->delete( $table, array( $fromField => $this->mId ), __METHOD__ );
if ( count( $insertions ) ) {
# The link array was constructed without FOR UPDATE, so there may be collisions
# This may cause minor link table inconsistencies, which is better than
# crippling the site with lock contention.
# The link array was constructed without FOR UPDATE, so there may
# be collisions. This may cause minor link table inconsistencies,
# which is better than crippling the site with lock contention.
$this->mDb->insert( $table, $insertions, __METHOD__, array( 'IGNORE' ) );
}
}

View file

@ -2416,16 +2416,20 @@ All transwiki import actions are logged at the [[Special:Log/import|import log]]
'nocredits' => 'There is no credits info available for this page.',
# Spam protection
'spamprotectiontitle' => 'Spam protection filter',
'spamprotectiontext' => 'The page you wanted to save was blocked by the spam filter. This is probably caused by a link to an external site.',
'spamprotectionmatch' => 'The following text is what triggered our spam filter: $1',
'subcategorycount' => 'There {{PLURAL:$1|is one subcategory|are $1 subcategories}} to this category.',
'categoryarticlecount' => 'There {{PLURAL:$1|is one page|are $1 pages}} in this category.',
'category-media-count' => 'There {{PLURAL:$1|is one file|are $1 files}} in this category.',
'listingcontinuesabbrev' => 'cont.',
'spambot_username' => 'MediaWiki spam cleanup',
'spam_reverting' => 'Reverting to last version not containing links to $1',
'spam_blanking' => 'All revisions contained links to $1, blanking',
'spamprotectiontitle' => 'Spam protection filter',
'spamprotectiontext' => 'The page you wanted to save was blocked by the spam filter. This is probably caused by a link to an external site.',
'spamprotectionmatch' => 'The following text is what triggered our spam filter: $1',
'subcategorycount' => 'There {{PLURAL:$1|is one subcategory|are $1 subcategories}} to this category.',
'category-subcat-count' => '{{PLURAL:$2|This category has only the following subcategory.|This category has the following {{PLURAL:$1|subcategory|$1 subcategories}}, out of $2 total.}}',
'category-subcat-count-limited' => 'This category has the following {{PLURAL:$1|subcategory|$1 subcategories}}.',
'category-article-count' => '{{PLURAL:$2|This category contains only the following page.|The following {{PLURAL:$1|page is|$1 pages are}} in this category, out of $2 total.}}',
'category-article-count-limited' => 'The following {{PLURAL:$1|page is|$1 pages are}} in the current category.',
'category-media-count' => '{{PLURAL:$2|This category contains only the following file.|The following {{PLURAL:$1|file is|$1 files are}} in this category, out of $2 total.}}',
'category-media-count-limited' => 'The following {{PLURAL:$1|file is|$1 files are}} in the current category.',
'listingcontinuesabbrev' => 'cont.',
'spambot_username' => 'MediaWiki spam cleanup',
'spam_reverting' => 'Reverting to last version not containing links to $1',
'spam_blanking' => 'All revisions contained links to $1, blanking',
# Info page
'infosubtitle' => 'Information for page',

View file

@ -0,0 +1,17 @@
CREATE TABLE /*$wgDBprefix*/category (
cat_id int unsigned NOT NULL auto_increment,
cat_title varchar(255) binary NOT NULL,
cat_pages int signed NOT NULL default 0,
cat_subcats int signed NOT NULL default 0,
cat_files int signed NOT NULL default 0,
cat_hidden tinyint(1) unsigned NOT NULL default 0,
PRIMARY KEY (cat_id),
UNIQUE KEY (cat_title),
KEY (cat_pages)
) /*$wgDBTableOptions*/;

View file

@ -0,0 +1,84 @@
<?php
/**
* @addtogroup Maintenance
* @author Simetrical
*/
define( 'REPORTING_INTERVAL', 1000 );
function populateCategory( $begin, $maxlag, $throttle, $force ) {
$dbw = wfGetDB( DB_MASTER );
if( !$force ) {
$row = $dbw->selectRow(
'updatelog',
'1',
array( 'ul_key' => 'populate category' ),
__FUNCTION__
);
if( $row ) {
echo "Category table already populated. Use php ".
"maintenace/populateCategory.php\n--force from the command line ".
"to override.\n";
return true;
}
}
$maxlag = intval( $maxlag );
$throttle = intval( $throttle );
$force = (bool)$force;
if( $begin !== '' ) {
$where = 'cl_to > '.$dbw->addQuotes( $begin );
} else {
$where = null;
}
$i = 0;
while( true ) {
# Find which category to update
$row = $dbw->selectRow(
'categorylinks',
'cl_to',
$where,
__FUNCTION__,
array(
'ORDER BY' => 'cl_to'
)
);
if( !$row ) {
# Done, hopefully.
break;
}
$name = $row->cl_to;
$where = 'cl_to > '.$dbw->addQuotes( $name );
# Use the row to update the category count
$cat = Category::newFromName( $name );
if( !is_object( $cat ) ) {
var_dump( $cat );
throw new MWException( "The category named $name is not valid?!" );
}
$cat->refreshCounts();
++$i;
if( !($i % REPORTING_INTERVAL) ) {
echo "$name\n";
wfWaitForSlaves( $maxlag );
}
usleep( $throttle*1000 );
}
if( $dbw->insert(
'updatelog',
array( 'ul_key' => 'populate category' ),
__FUNCTION__,
'IGNORE'
)
) {
echo "Category population complete.\n";
return true;
} else {
echo "Could not insert category population row.\n";
return false;
}
}

View file

@ -0,0 +1,51 @@
<?php
/**
* @addtogroup Maintenance
* @author Simetrical
*/
$optionsWithArgs = array( 'begin', 'max-slave-lag', 'throttle' );
require_once "commandLine.inc";
require_once "populateCategory.inc";
if( isset( $options['help'] ) ) {
echo <<<TEXT
This script will populate the category table, added in MediaWiki 1.13. It will
print out progress indicators every 1000 categories it adds to the table. The
script is perfectly safe to run on large, live wikis, and running it multiple
times is harmless. You may want to use the throttling options if it's causing
too much load; they will not affect correctness.
If the script is stopped and later resumed, you can use the --begin option with
the last printed progress indicator to pick up where you left off. This is
safe, because any newly-added categories before this cutoff will have been
added after the software update and so will be populated anyway.
When the script has finished, it will make a note of this in the database, and
will not run again without the --force option.
Usage:
php populateCategory.php [--max-slave-lag <seconds>] [--begin <name>]
[--throttle <seconds>] [--force]
--begin: Only do categories whose names are alphabetically after the pro-
vided name. Default: empty (start from beginning).
--max-slave-lag: If slave lag exceeds this many seconds, wait until it
drops before continuing. Default: 10.
--throttle: Wait this many milliseconds after each category. Default: 0.
--force: Run regardless of whether the database says it's been run already.
TEXT;
exit( 0 );
}
$defaults = array(
'begin' => '',
'max-slave-length' => 10,
'throttle' => 0,
'force' => false
);
$options = array_merge( $defaults, $options );
populateCategory( $options['begin'], $options['max-slave-length'],
$options['throttle'], $options['force'] );

View file

@ -486,6 +486,39 @@ CREATE TABLE /*$wgDBprefix*/categorylinks (
) /*$wgDBTableOptions*/;
--
-- Track all existing categories. Something is a category if 1) it has an en-
-- try somewhere in categorylinks, or 2) it once did. Categories might not
-- have corresponding pages, so they need to be tracked separately.
--
CREATE TABLE /*$wgDBprefix*/category (
-- Primary key
cat_id int unsigned NOT NULL auto_increment,
-- Name of the category, in the same form as page_title (with underscores).
-- If there is a category page corresponding to this category, by definition,
-- it has this name (in the Category namespace).
cat_title varchar(255) binary NOT NULL,
-- The numbers of member pages (including categories and media), subcatego-
-- ries, and Image: namespace members, respectively. These are signed to
-- make underflow more obvious. We make the first number include the second
-- two for better sorting: subtracting for display is easy, adding for order-
-- ing is not.
cat_pages int signed NOT NULL default 0,
cat_subcats int signed NOT NULL default 0,
cat_files int signed NOT NULL default 0,
-- Should the category be hidden from article views?
cat_hidden tinyint(1) unsigned NOT NULL default 0,
PRIMARY KEY (cat_id),
UNIQUE KEY (cat_title),
-- For Special:Mostlinkedcategories
KEY (cat_pages)
) /*$wgDBTableOptions*/;
--
-- Track links to external URLs
--

View file

@ -133,6 +133,8 @@ $wgMysqlUpdates = array(
array( 'add_field', 'ipblocks', 'ipb_by_text', 'patch-ipb_by_text.sql' ),
array( 'add_table', 'page_props', 'patch-page_props.sql' ),
array( 'add_table', 'updatelog', 'patch-updatelog.sql' ),
array( 'add_table', 'category', 'patch-category.sql' ),
array( 'do_category_population' ),
);
@ -1135,6 +1137,20 @@ function do_restrictions_update() {
}
}
function do_category_population() {
if( update_row_exists( 'populate category' ) ) {
echo "...category table already populated.\n";
return;
}
require_once( 'populateCategory.inc' );
echo "Populating category table, printing progress markers. ".
"For large databases, you\n".
"may want to hit Ctrl-C and do this manually with maintenance/\n".
"populateCategory.php.\n";
populateCategory( '', 10, 0, true );
echo "Done populating category table.\n";
}
function
pg_describe_table($table)
{