2006-01-09 14:20:26 +00:00
|
|
|
<?php
|
2012-05-21 19:56:04 +00:00
|
|
|
/**
|
|
|
|
|
* Recent changes filtering by category.
|
|
|
|
|
*
|
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
* (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
|
|
|
*
|
|
|
|
|
* @file
|
|
|
|
|
*/
|
|
|
|
|
|
2017-02-10 18:09:05 +00:00
|
|
|
use Wikimedia\Rdbms\IDatabase;
|
|
|
|
|
|
2007-04-04 05:22:37 +00:00
|
|
|
/**
|
2014-08-29 01:58:25 +00:00
|
|
|
* The "CategoryFinder" class takes a list of articles, creates an internal
|
2007-04-12 19:38:25 +00:00
|
|
|
* representation of all their parent categories (as well as parents of
|
|
|
|
|
* parents etc.). From this representation, it determines which of these
|
|
|
|
|
* articles are in one or all of a given subset of categories.
|
|
|
|
|
*
|
2007-04-04 05:22:37 +00:00
|
|
|
* Example use :
|
2015-04-29 04:49:40 +00:00
|
|
|
* @code
|
2012-10-08 13:44:44 +00:00
|
|
|
* # Determines whether the article with the page_id 12345 is in both
|
|
|
|
|
* # "Category 1" and "Category 2" or their subcategories, respectively
|
2007-04-12 19:38:25 +00:00
|
|
|
*
|
2014-08-29 01:58:25 +00:00
|
|
|
* $cf = new CategoryFinder;
|
2012-10-08 13:44:44 +00:00
|
|
|
* $cf->seed(
|
2016-08-07 10:27:38 +00:00
|
|
|
* [ 12345 ],
|
|
|
|
|
* [ 'Category 1', 'Category 2' ],
|
2012-10-08 13:44:44 +00:00
|
|
|
* 'AND'
|
|
|
|
|
* );
|
|
|
|
|
* $a = $cf->run();
|
|
|
|
|
* print implode( ',' , $a );
|
2015-04-29 04:49:40 +00:00
|
|
|
* @endcode
|
2007-04-04 05:22:37 +00:00
|
|
|
*/
|
2014-08-29 01:58:25 +00:00
|
|
|
class CategoryFinder {
|
2014-05-12 14:42:51 +00:00
|
|
|
/** @var int[] The original article IDs passed to the seed function */
|
2016-02-17 09:09:32 +00:00
|
|
|
protected $articles = [];
|
2011-09-04 21:40:17 +00:00
|
|
|
|
2014-05-12 14:42:51 +00:00
|
|
|
/** @var array Array of DBKEY category names for categories that don't have a page */
|
2016-02-17 09:09:32 +00:00
|
|
|
protected $deadend = [];
|
2014-05-12 14:42:51 +00:00
|
|
|
|
2016-08-07 10:27:38 +00:00
|
|
|
/** @var array Array of [ ID => [] ] */
|
2016-02-17 09:09:32 +00:00
|
|
|
protected $parents = [];
|
2014-05-12 14:42:51 +00:00
|
|
|
|
|
|
|
|
/** @var array Array of article/category IDs */
|
2016-02-17 09:09:32 +00:00
|
|
|
protected $next = [];
|
2014-05-12 14:42:51 +00:00
|
|
|
|
|
|
|
|
/** @var array Array of DBKEY category names */
|
2016-02-17 09:09:32 +00:00
|
|
|
protected $targets = [];
|
2014-05-12 14:42:51 +00:00
|
|
|
|
|
|
|
|
/** @var array */
|
2016-02-17 09:09:32 +00:00
|
|
|
protected $name2id = [];
|
2014-05-12 14:42:51 +00:00
|
|
|
|
2014-07-24 17:42:24 +00:00
|
|
|
/** @var string "AND" or "OR" */
|
2014-05-12 14:42:51 +00:00
|
|
|
protected $mode;
|
|
|
|
|
|
2016-09-05 20:21:26 +00:00
|
|
|
/** @var IDatabase Read-DB replica DB */
|
2014-05-12 14:42:51 +00:00
|
|
|
protected $dbr;
|
2006-01-09 14:20:26 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Initializes the instance. Do this prior to calling run().
|
2014-08-29 01:58:25 +00:00
|
|
|
* @param array $articleIds Array of article IDs
|
2014-04-20 19:16:57 +00:00
|
|
|
* @param array $categories FIXME
|
2013-03-11 17:15:01 +00:00
|
|
|
* @param string $mode FIXME, default 'AND'.
|
2011-05-17 22:03:20 +00:00
|
|
|
* @todo FIXME: $categories/$mode
|
2006-04-19 15:46:24 +00:00
|
|
|
*/
|
2014-08-29 01:58:25 +00:00
|
|
|
public function seed( $articleIds, $categories, $mode = 'AND' ) {
|
|
|
|
|
$this->articles = $articleIds;
|
|
|
|
|
$this->next = $articleIds;
|
2010-02-02 13:20:26 +00:00
|
|
|
$this->mode = $mode;
|
2006-01-09 14:20:26 +00:00
|
|
|
|
|
|
|
|
# Set the list of target categories; convert them to DBKEY form first
|
2016-02-17 09:09:32 +00:00
|
|
|
$this->targets = [];
|
2010-02-02 13:20:26 +00:00
|
|
|
foreach ( $categories as $c ) {
|
2009-01-29 01:29:43 +00:00
|
|
|
$ct = Title::makeTitleSafe( NS_CATEGORY, $c );
|
2010-02-14 22:07:30 +00:00
|
|
|
if ( $ct ) {
|
2009-01-29 01:29:43 +00:00
|
|
|
$c = $ct->getDBkey();
|
|
|
|
|
$this->targets[$c] = $c;
|
|
|
|
|
}
|
2006-01-09 14:20:26 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Iterates through the parent tree starting with the seed values,
|
|
|
|
|
* then checks the articles if they match the conditions
|
2014-07-24 17:42:24 +00:00
|
|
|
* @return array Array of page_ids (those given to seed() that match the conditions)
|
Some small doc tweaks to reduce Doxygen warnings, namely:
* @link. You might think @link would surely mean "here comes a web URL" ... but @link is a valid command
in Doxygen, which means an entirely different kind of link (an internal link to somewhere, so that you can separate
documentation and implementation). The result is a mess, and the best solution I can see is to use "@see" instead of "@link".
* Warning: argument `nourl' of command @param is not found in the argument list of Linker::makeMediaLinkObj($title,$text='')
* Moving few class descriptions to right above classes, and/or formatting into Javadoc style.
* "@addtogroup Special Pages" --> "@addtogroup SpecialPage" so that all special pages have the same @addtogroup tag.
* @fixme --> @todo (must have missed these before)
* "@param $specialPage @see" remove the "@" in the "@see" to stop warning.
* @throws wants type, then a brief description, to stop warning.
This last one is for PHPdocumentor only, but it fixes something for PHPDocumentor, and should be neutral for Doxygen:
* WARNING in includes/api/ApiFormatYaml_spyc.php on line 860: docblock template never terminated with /**#@-*/
2007-04-18 09:50:10 +00:00
|
|
|
*/
|
2014-08-29 01:58:25 +00:00
|
|
|
public function run() {
|
2016-09-05 19:55:19 +00:00
|
|
|
$this->dbr = wfGetDB( DB_REPLICA );
|
2010-03-23 15:43:57 +00:00
|
|
|
while ( count( $this->next ) > 0 ) {
|
2014-08-29 01:58:25 +00:00
|
|
|
$this->scanNextLayer();
|
2006-01-09 14:20:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Now check if this applies to the individual articles
|
2016-02-17 09:09:32 +00:00
|
|
|
$ret = [];
|
2010-02-14 22:07:30 +00:00
|
|
|
|
2010-02-02 13:20:26 +00:00
|
|
|
foreach ( $this->articles as $article ) {
|
|
|
|
|
$conds = $this->targets;
|
|
|
|
|
if ( $this->check( $article, $conds ) ) {
|
2006-01-09 14:20:26 +00:00
|
|
|
# Matches the conditions
|
2010-02-02 13:20:26 +00:00
|
|
|
$ret[] = $article;
|
2006-01-09 14:20:26 +00:00
|
|
|
}
|
|
|
|
|
}
|
2010-02-02 13:20:26 +00:00
|
|
|
return $ret;
|
2006-01-09 14:20:26 +00:00
|
|
|
}
|
|
|
|
|
|
2014-09-05 18:56:41 +00:00
|
|
|
/**
|
|
|
|
|
* Get the parents. Only really useful if run() has been called already
|
|
|
|
|
* @return array
|
|
|
|
|
*/
|
|
|
|
|
public function getParents() {
|
|
|
|
|
return $this->parents;
|
|
|
|
|
}
|
|
|
|
|
|
2006-01-09 14:20:26 +00:00
|
|
|
/**
|
|
|
|
|
* This functions recurses through the parent representation, trying to match the conditions
|
2013-03-11 17:15:01 +00:00
|
|
|
* @param int $id The article/category to check
|
|
|
|
|
* @param array $conds The array of categories to match
|
2014-04-20 19:16:57 +00:00
|
|
|
* @param array $path Used to check for recursion loops
|
Some small doc tweaks to reduce Doxygen warnings, namely:
* @link. You might think @link would surely mean "here comes a web URL" ... but @link is a valid command
in Doxygen, which means an entirely different kind of link (an internal link to somewhere, so that you can separate
documentation and implementation). The result is a mess, and the best solution I can see is to use "@see" instead of "@link".
* Warning: argument `nourl' of command @param is not found in the argument list of Linker::makeMediaLinkObj($title,$text='')
* Moving few class descriptions to right above classes, and/or formatting into Javadoc style.
* "@addtogroup Special Pages" --> "@addtogroup SpecialPage" so that all special pages have the same @addtogroup tag.
* @fixme --> @todo (must have missed these before)
* "@param $specialPage @see" remove the "@" in the "@see" to stop warning.
* @throws wants type, then a brief description, to stop warning.
This last one is for PHPdocumentor only, but it fixes something for PHPDocumentor, and should be neutral for Doxygen:
* WARNING in includes/api/ApiFormatYaml_spyc.php on line 860: docblock template never terminated with /**#@-*/
2007-04-18 09:50:10 +00:00
|
|
|
* @return bool Does this match the conditions?
|
|
|
|
|
*/
|
2016-02-17 09:09:32 +00:00
|
|
|
private function check( $id, &$conds, $path = [] ) {
|
2008-07-28 20:59:17 +00:00
|
|
|
// Check for loops and stop!
|
2010-02-14 22:07:30 +00:00
|
|
|
if ( in_array( $id, $path ) ) {
|
2008-07-28 20:59:17 +00:00
|
|
|
return false;
|
2010-02-14 22:07:30 +00:00
|
|
|
}
|
|
|
|
|
|
2008-07-28 20:59:17 +00:00
|
|
|
$path[] = $id;
|
|
|
|
|
|
2013-03-13 07:42:41 +00:00
|
|
|
# Shortcut (runtime paranoia): No conditions=all matched
|
2010-02-14 22:07:30 +00:00
|
|
|
if ( count( $conds ) == 0 ) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2007-04-12 19:38:25 +00:00
|
|
|
|
2010-02-14 22:07:30 +00:00
|
|
|
if ( !isset( $this->parents[$id] ) ) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2006-01-09 14:20:26 +00:00
|
|
|
|
|
|
|
|
# iterate through the parents
|
2010-02-02 13:20:26 +00:00
|
|
|
foreach ( $this->parents[$id] as $p ) {
|
2012-10-08 13:44:44 +00:00
|
|
|
$pname = $p->cl_to;
|
2007-04-12 19:38:25 +00:00
|
|
|
|
2006-01-09 14:20:26 +00:00
|
|
|
# Is this a condition?
|
2010-02-02 13:20:26 +00:00
|
|
|
if ( isset( $conds[$pname] ) ) {
|
2006-01-09 14:20:26 +00:00
|
|
|
# This key is in the category list!
|
2010-03-23 15:43:57 +00:00
|
|
|
if ( $this->mode == 'OR' ) {
|
2006-01-09 14:20:26 +00:00
|
|
|
# One found, that's enough!
|
2016-02-17 09:09:32 +00:00
|
|
|
$conds = [];
|
2010-02-02 13:20:26 +00:00
|
|
|
return true;
|
2006-01-09 14:20:26 +00:00
|
|
|
} else {
|
|
|
|
|
# Assuming "AND" as default
|
2010-03-23 15:43:57 +00:00
|
|
|
unset( $conds[$pname] );
|
2010-02-02 13:20:26 +00:00
|
|
|
if ( count( $conds ) == 0 ) {
|
2006-01-09 14:20:26 +00:00
|
|
|
# All conditions met, done
|
2010-02-02 13:20:26 +00:00
|
|
|
return true;
|
2006-01-09 14:20:26 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2007-04-12 19:38:25 +00:00
|
|
|
|
2006-01-09 14:20:26 +00:00
|
|
|
# Not done yet, try sub-parents
|
2010-02-02 13:20:26 +00:00
|
|
|
if ( !isset( $this->name2id[$pname] ) ) {
|
2006-01-09 14:20:26 +00:00
|
|
|
# No sub-parent
|
2010-03-23 15:43:57 +00:00
|
|
|
continue;
|
2006-01-09 14:20:26 +00:00
|
|
|
}
|
2010-02-14 22:07:30 +00:00
|
|
|
$done = $this->check( $this->name2id[$pname], $conds, $path );
|
2010-02-02 13:20:26 +00:00
|
|
|
if ( $done || count( $conds ) == 0 ) {
|
2006-01-09 14:20:26 +00:00
|
|
|
# Subparents have done it!
|
2010-02-02 13:20:26 +00:00
|
|
|
return true;
|
2006-01-09 14:20:26 +00:00
|
|
|
}
|
|
|
|
|
}
|
2010-02-02 13:20:26 +00:00
|
|
|
return false;
|
2006-01-09 14:20:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Scans a "parent layer" of the articles/categories in $this->next
|
Some small doc tweaks to reduce Doxygen warnings, namely:
* @link. You might think @link would surely mean "here comes a web URL" ... but @link is a valid command
in Doxygen, which means an entirely different kind of link (an internal link to somewhere, so that you can separate
documentation and implementation). The result is a mess, and the best solution I can see is to use "@see" instead of "@link".
* Warning: argument `nourl' of command @param is not found in the argument list of Linker::makeMediaLinkObj($title,$text='')
* Moving few class descriptions to right above classes, and/or formatting into Javadoc style.
* "@addtogroup Special Pages" --> "@addtogroup SpecialPage" so that all special pages have the same @addtogroup tag.
* @fixme --> @todo (must have missed these before)
* "@param $specialPage @see" remove the "@" in the "@see" to stop warning.
* @throws wants type, then a brief description, to stop warning.
This last one is for PHPdocumentor only, but it fixes something for PHPDocumentor, and should be neutral for Doxygen:
* WARNING in includes/api/ApiFormatYaml_spyc.php on line 860: docblock template never terminated with /**#@-*/
2007-04-18 09:50:10 +00:00
|
|
|
*/
|
2014-08-29 01:58:25 +00:00
|
|
|
private function scanNextLayer() {
|
2006-01-09 14:20:26 +00:00
|
|
|
# Find all parents of the article currently in $this->next
|
2016-02-17 09:09:32 +00:00
|
|
|
$layer = [];
|
2006-01-09 14:20:26 +00:00
|
|
|
$res = $this->dbr->select(
|
2010-02-14 22:07:30 +00:00
|
|
|
/* FROM */ 'categorylinks',
|
2017-10-12 18:34:15 +00:00
|
|
|
/* SELECT */ [ 'cl_to', 'cl_from' ],
|
2016-02-17 09:09:32 +00:00
|
|
|
/* WHERE */ [ 'cl_from' => $this->next ],
|
2010-03-23 15:43:57 +00:00
|
|
|
__METHOD__ . '-1'
|
2006-01-09 14:20:26 +00:00
|
|
|
);
|
2010-10-13 23:11:40 +00:00
|
|
|
foreach ( $res as $o ) {
|
2010-03-23 15:43:57 +00:00
|
|
|
$k = $o->cl_to;
|
2006-01-09 14:20:26 +00:00
|
|
|
|
|
|
|
|
# Update parent tree
|
2010-02-02 13:20:26 +00:00
|
|
|
if ( !isset( $this->parents[$o->cl_from] ) ) {
|
2016-02-17 09:09:32 +00:00
|
|
|
$this->parents[$o->cl_from] = [];
|
2006-01-09 14:20:26 +00:00
|
|
|
}
|
2010-02-02 13:20:26 +00:00
|
|
|
$this->parents[$o->cl_from][$k] = $o;
|
2006-01-09 14:20:26 +00:00
|
|
|
|
|
|
|
|
# Ignore those we already have
|
2010-03-23 15:43:57 +00:00
|
|
|
if ( in_array( $k, $this->deadend ) ) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2010-02-14 22:07:30 +00:00
|
|
|
|
2010-03-23 15:43:57 +00:00
|
|
|
if ( isset( $this->name2id[$k] ) ) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2006-01-09 14:20:26 +00:00
|
|
|
|
|
|
|
|
# Hey, new category!
|
2010-02-02 13:20:26 +00:00
|
|
|
$layer[$k] = $k;
|
2006-01-09 14:20:26 +00:00
|
|
|
}
|
|
|
|
|
|
2016-02-17 09:09:32 +00:00
|
|
|
$this->next = [];
|
2007-04-12 19:38:25 +00:00
|
|
|
|
2006-01-09 14:20:26 +00:00
|
|
|
# Find the IDs of all category pages in $layer, if they exist
|
2010-03-23 15:43:57 +00:00
|
|
|
if ( count( $layer ) > 0 ) {
|
2006-01-09 14:20:26 +00:00
|
|
|
$res = $this->dbr->select(
|
2010-02-14 22:07:30 +00:00
|
|
|
/* FROM */ 'page',
|
2016-02-17 09:09:32 +00:00
|
|
|
/* SELECT */ [ 'page_id', 'page_title' ],
|
|
|
|
|
/* WHERE */ [ 'page_namespace' => NS_CATEGORY, 'page_title' => $layer ],
|
2010-03-23 15:43:57 +00:00
|
|
|
__METHOD__ . '-2'
|
2006-01-09 14:20:26 +00:00
|
|
|
);
|
2010-10-13 23:11:40 +00:00
|
|
|
foreach ( $res as $o ) {
|
2010-02-02 13:20:26 +00:00
|
|
|
$id = $o->page_id;
|
|
|
|
|
$name = $o->page_title;
|
|
|
|
|
$this->name2id[$name] = $id;
|
|
|
|
|
$this->next[] = $id;
|
|
|
|
|
unset( $layer[$name] );
|
2006-01-09 14:20:26 +00:00
|
|
|
}
|
2010-02-02 13:20:26 +00:00
|
|
|
}
|
2006-01-09 14:20:26 +00:00
|
|
|
|
|
|
|
|
# Mark dead ends
|
2010-02-02 13:20:26 +00:00
|
|
|
foreach ( $layer as $v ) {
|
|
|
|
|
$this->deadend[$v] = $v;
|
2006-01-09 14:20:26 +00:00
|
|
|
}
|
2012-10-08 13:44:44 +00:00
|
|
|
}
|
2010-02-14 22:07:30 +00:00
|
|
|
}
|