Categoryfinder::seed() was passing category names through Title::newFromText($ct,NS_CATEGORY), which meant that if the name started with what looks like a namespace, that would override the NS_CATEGORY default. Thus you got 'Wikipedia:Books' -> text portion 'Books' when you wanted 'Category:Wikipedia:Books' -> text portion 'Wikipedia:Books'. Using Title::makeTitleSafe(NS_CATEGORY,$ct) instead forces it to always use the category namespace. Additionally, I've added a check for invalid input -- previously a bad input leading to an invalid title would just kill it dead with a fatal error. Invalid input in the category list is now skipped.
198 lines
5.3 KiB
PHP
198 lines
5.3 KiB
PHP
<?php
|
|
|
|
/**
|
|
* The "Categoryfinder" class takes a list of articles, creates an internal
|
|
* representation of all their parent categories (as well as parents of
|
|
* parents etc.). From this representation, it determines which of these
|
|
* articles are in one or all of a given subset of categories.
|
|
*
|
|
* Example use :
|
|
* <code>
|
|
* # Determines whether the article with the page_id 12345 is in both
|
|
* # "Category 1" and "Category 2" or their subcategories, respectively
|
|
*
|
|
* $cf = new Categoryfinder ;
|
|
* $cf->seed (
|
|
* array ( 12345 ) ,
|
|
* array ( "Category 1","Category 2" ) ,
|
|
* "AND"
|
|
* ) ;
|
|
* $a = $cf->run() ;
|
|
* print implode ( "," , $a ) ;
|
|
* </code>
|
|
*
|
|
*/
|
|
class Categoryfinder {
|
|
|
|
var $articles = array () ; # The original article IDs passed to the seed function
|
|
var $deadend = array () ; # Array of DBKEY category names for categories that don't have a page
|
|
var $parents = array () ; # Array of [ID => array()]
|
|
var $next = array () ; # Array of article/category IDs
|
|
var $targets = array () ; # Array of DBKEY category names
|
|
var $name2id = array () ;
|
|
var $mode ; # "AND" or "OR"
|
|
var $dbr ; # Read-DB slave
|
|
|
|
/**
|
|
* Constructor (currently empty).
|
|
*/
|
|
function __construct() {
|
|
}
|
|
|
|
/**
|
|
* Initializes the instance. Do this prior to calling run().
|
|
* @param $article_ids Array of article IDs
|
|
* @param $categories FIXME
|
|
* @param $mode String: FIXME, default 'AND'.
|
|
*/
|
|
function seed ( $article_ids , $categories , $mode = "AND" ) {
|
|
$this->articles = $article_ids ;
|
|
$this->next = $article_ids ;
|
|
$this->mode = $mode ;
|
|
|
|
# Set the list of target categories; convert them to DBKEY form first
|
|
$this->targets = array () ;
|
|
foreach ( $categories AS $c ) {
|
|
$ct = Title::makeTitleSafe( NS_CATEGORY, $c );
|
|
if( $ct ) {
|
|
$c = $ct->getDBkey();
|
|
$this->targets[$c] = $c;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Iterates through the parent tree starting with the seed values,
|
|
* then checks the articles if they match the conditions
|
|
* @return array of page_ids (those given to seed() that match the conditions)
|
|
*/
|
|
function run () {
|
|
$this->dbr = wfGetDB( DB_SLAVE );
|
|
while ( count ( $this->next ) > 0 ) {
|
|
$this->scan_next_layer () ;
|
|
}
|
|
|
|
# Now check if this applies to the individual articles
|
|
$ret = array () ;
|
|
foreach ( $this->articles AS $article ) {
|
|
$conds = $this->targets ;
|
|
if ( $this->check ( $article , $conds ) ) {
|
|
# Matches the conditions
|
|
$ret[] = $article ;
|
|
}
|
|
}
|
|
return $ret ;
|
|
}
|
|
|
|
/**
|
|
* This functions recurses through the parent representation, trying to match the conditions
|
|
* @param $id The article/category to check
|
|
* @param $conds The array of categories to match
|
|
* @param $path used to check for recursion loops
|
|
* @return bool Does this match the conditions?
|
|
*/
|
|
function check ( $id , &$conds, $path=array() ) {
|
|
// Check for loops and stop!
|
|
if( in_array( $id, $path ) )
|
|
return false;
|
|
$path[] = $id;
|
|
|
|
# Shortcut (runtime paranoia): No contitions=all matched
|
|
if ( count ( $conds ) == 0 ) return true ;
|
|
|
|
if ( !isset ( $this->parents[$id] ) ) return false ;
|
|
|
|
# iterate through the parents
|
|
foreach ( $this->parents[$id] AS $p ) {
|
|
$pname = $p->cl_to ;
|
|
|
|
# Is this a condition?
|
|
if ( isset ( $conds[$pname] ) ) {
|
|
# This key is in the category list!
|
|
if ( $this->mode == "OR" ) {
|
|
# One found, that's enough!
|
|
$conds = array () ;
|
|
return true ;
|
|
} else {
|
|
# Assuming "AND" as default
|
|
unset ( $conds[$pname] ) ;
|
|
if ( count ( $conds ) == 0 ) {
|
|
# All conditions met, done
|
|
return true ;
|
|
}
|
|
}
|
|
}
|
|
|
|
# Not done yet, try sub-parents
|
|
if ( !isset ( $this->name2id[$pname] ) ) {
|
|
# No sub-parent
|
|
continue ;
|
|
}
|
|
$done = $this->check ( $this->name2id[$pname] , $conds, $path );
|
|
if ( $done OR count ( $conds ) == 0 ) {
|
|
# Subparents have done it!
|
|
return true ;
|
|
}
|
|
}
|
|
return false ;
|
|
}
|
|
|
|
/**
|
|
* Scans a "parent layer" of the articles/categories in $this->next
|
|
*/
|
|
function scan_next_layer () {
|
|
$fname = "Categoryfinder::scan_next_layer" ;
|
|
|
|
# Find all parents of the article currently in $this->next
|
|
$layer = array () ;
|
|
$res = $this->dbr->select(
|
|
/* FROM */ 'categorylinks',
|
|
/* SELECT */ '*',
|
|
/* WHERE */ array( 'cl_from' => $this->next ),
|
|
$fname."-1"
|
|
);
|
|
while ( $o = $this->dbr->fetchObject( $res ) ) {
|
|
$k = $o->cl_to ;
|
|
|
|
# Update parent tree
|
|
if ( !isset ( $this->parents[$o->cl_from] ) ) {
|
|
$this->parents[$o->cl_from] = array () ;
|
|
}
|
|
$this->parents[$o->cl_from][$k] = $o ;
|
|
|
|
# Ignore those we already have
|
|
if ( in_array ( $k , $this->deadend ) ) continue ;
|
|
if ( isset ( $this->name2id[$k] ) ) continue ;
|
|
|
|
# Hey, new category!
|
|
$layer[$k] = $k ;
|
|
}
|
|
$this->dbr->freeResult( $res ) ;
|
|
|
|
$this->next = array() ;
|
|
|
|
# Find the IDs of all category pages in $layer, if they exist
|
|
if ( count ( $layer ) > 0 ) {
|
|
$res = $this->dbr->select(
|
|
/* FROM */ 'page',
|
|
/* SELECT */ 'page_id,page_title',
|
|
/* WHERE */ array( 'page_namespace' => NS_CATEGORY , 'page_title' => $layer ),
|
|
$fname."-2"
|
|
);
|
|
while ( $o = $this->dbr->fetchObject( $res ) ) {
|
|
$id = $o->page_id ;
|
|
$name = $o->page_title ;
|
|
$this->name2id[$name] = $id ;
|
|
$this->next[] = $id ;
|
|
unset ( $layer[$name] ) ;
|
|
}
|
|
$this->dbr->freeResult( $res ) ;
|
|
}
|
|
|
|
# Mark dead ends
|
|
foreach ( $layer AS $v ) {
|
|
$this->deadend[$v] = $v ;
|
|
}
|
|
}
|
|
|
|
} # END OF CLASS "Categoryfinder"
|