wiki.techinc.nl/includes/parser/LinkHolderArray.php
Tim Starling 350b498b9f * Revert back to my parser pseudo-branch again. Note: if you feel like reverting this, reverting Parser.php alone to r40010 will work just fine.
* Merged replaceFreeExternalLinks() with doMagicLinks(). Makes a lot of sense, very similar operations, doesn't break any parser tests. Stops free links from interacting with other parser stages, the same way ISBN links don't. 
* The pass order change fixes Brion's complaint in r39980. Early link expansion, triggered by having more than 1000 links in the page, was outputting URLs which were destroyed by RFEL. Added parser test.
* Fixed an unrelated bug in LinkHolderArray::replace(): if a link to a redirect appears in two separate RLH calls, the second and subsequent calls do not add the mw-redirect class. Caused by an unmigrated LinkCache fetch.
* Added a parser test for a pass interaction bug that the pass order change fixes.
* The fuzzer told me to tell you that free external links in non-caption image parameters, which are and have always been invisible, are now not registered either.
* Miscellaneous supporting updates to the test infrastructure.
2008-08-26 14:37:15 +00:00

420 lines
12 KiB
PHP

<?php
class LinkHolderArray {
var $internals = array(), $interwikis = array();
var $size = 0;
var $parent;
function __construct( $parent ) {
$this->parent = $parent;
}
/**
* Reduce memory usage to reduce the impact of circular references
*/
function __destruct() {
foreach ( $this as $name => $value ) {
unset( $this->$name );
}
}
/**
* Merge another LinkHolderArray into this one
*/
function merge( $other ) {
foreach ( $other->internals as $ns => $entries ) {
$this->size += count( $entries );
if ( !isset( $this->internals[$ns] ) ) {
$this->internals[$ns] = $entries;
} else {
$this->internals[$ns] += $entries;
}
}
$this->interwikis += $other->interwikis;
}
/**
* Returns true if the memory requirements of this object are getting large
*/
function isBig() {
global $wgLinkHolderBatchSize;
return $this->size > $wgLinkHolderBatchSize;
}
/**
* Clear all stored link holders.
* Make sure you don't have any text left using these link holders, before you call this
*/
function clear() {
$this->internals = array();
$this->interwikis = array();
$this->size = 0;
}
/**
* Make a link placeholder. The text returned can be later resolved to a real link with
* replaceLinkHolders(). This is done for two reasons: firstly to avoid further
* parsing of interwiki links, and secondly to allow all existence checks and
* article length checks (for stub links) to be bundled into a single query.
*
*/
function makeHolder( $nt, $text = '', $query = '', $trail = '', $prefix = '' ) {
wfProfileIn( __METHOD__ );
if ( ! is_object($nt) ) {
# Fail gracefully
$retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}";
} else {
# Separate the link trail from the rest of the link
list( $inside, $trail ) = Linker::splitTrail( $trail );
$entry = array(
'title' => $nt,
'text' => $prefix.$text.$inside,
'pdbk' => $nt->getPrefixedDBkey(),
);
if ( $query !== '' ) {
$entry['query'] = $query;
}
if ( $nt->isExternal() ) {
// Use a globally unique ID to keep the objects mergable
$key = $this->parent->nextLinkID();
$this->interwikis[$key] = $entry;
$retVal = "<!--IWLINK $key-->{$trail}";
} else {
$key = $this->parent->nextLinkID();
$ns = $nt->getNamespace();
$this->internals[$ns][$key] = $entry;
$retVal = "<!--LINK $ns:$key-->{$trail}";
}
$this->size++;
}
wfProfileOut( __METHOD__ );
return $retVal;
}
/**
* Replace <!--LINK--> link placeholders with actual links, in the buffer
* Placeholders created in Skin::makeLinkObj()
* Returns an array of link CSS classes, indexed by PDBK.
*/
function replace( &$text ) {
wfProfileIn( __METHOD__ );
$colours = $this->replaceInternal( $text );
$this->replaceInterwiki( $text );
wfProfileOut( __METHOD__ );
return $colours;
}
/**
* Replace internal links
*/
protected function replaceInternal( &$text ) {
if ( !$this->internals ) {
return;
}
wfProfileIn( __METHOD__ );
global $wgUser, $wgContLang;
$pdbks = array();
$colours = array();
$linkcolour_ids = array();
$sk = $this->parent->getOptions()->getSkin();
$linkCache = LinkCache::singleton();
$output = $this->parent->getOutput();
wfProfileIn( __METHOD__.'-check' );
$dbr = wfGetDB( DB_SLAVE );
$page = $dbr->tableName( 'page' );
$threshold = $wgUser->getOption('stubthreshold');
# Sort by namespace
ksort( $this->internals );
# Generate query
$query = false;
$current = null;
foreach ( $this->internals as $ns => $entries ) {
foreach ( $entries as $index => $entry ) {
$key = "$ns:$index";
$title = $entry['title'];
$pdbk = $entry['pdbk'];
# Skip invalid entries.
# Result will be ugly, but prevents crash.
if ( is_null( $title ) ) {
continue;
}
# Check if it's a static known link, e.g. interwiki
if ( $title->isAlwaysKnown() ) {
$colours[$pdbk] = '';
} elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) {
$colours[$pdbk] = $sk->getLinkColour( $title, $threshold );
$output->addLink( $title, $id );
} elseif ( $linkCache->isBadLink( $pdbk ) ) {
$colours[$pdbk] = 'new';
} elseif ( $title->getNamespace() == NS_SPECIAL && !SpecialPage::exists( $pdbk ) ) {
$colours[$pdbk] = 'new';
} else {
# Not in the link cache, add it to the query
if ( !isset( $current ) ) {
$current = $ns;
$query = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len";
$query .= " FROM $page WHERE (page_namespace=$ns AND page_title IN(";
} elseif ( $current != $ns ) {
$current = $ns;
$query .= ")) OR (page_namespace=$ns AND page_title IN(";
} else {
$query .= ', ';
}
$query .= $dbr->addQuotes( $title->getDBkey() );
}
}
}
if ( $query ) {
$query .= '))';
$res = $dbr->query( $query, __METHOD__ );
# Fetch data and form into an associative array
# non-existent = broken
while ( $s = $dbr->fetchObject($res) ) {
$title = Title::makeTitle( $s->page_namespace, $s->page_title );
$pdbk = $title->getPrefixedDBkey();
$linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect );
$output->addLink( $title, $s->page_id );
# FIXME: convoluted data flow
# The redirect status and length is passed to getLinkColour via the LinkCache
# Use formal parameters instead
$colours[$pdbk] = $sk->getLinkColour( $title, $threshold );
//add id to the extension todolist
$linkcolour_ids[$s->page_id] = $pdbk;
}
unset( $res );
//pass an array of page_ids to an extension
wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
}
wfProfileOut( __METHOD__.'-check' );
# Do a second query for different language variants of links and categories
if($wgContLang->hasVariants()){
$linkBatch = new LinkBatch();
$variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders)
$categoryMap = array(); // maps $category_variant => $category (dbkeys)
$varCategories = array(); // category replacements oldDBkey => newDBkey
$categories = $output->getCategoryLinks();
// Add variants of links to link batch
foreach ( $this->internals as $ns => $entries ) {
foreach ( $entries as $index => $entry ) {
$key = "$ns:$index";
$pdbk = $entry['pdbk'];
$title = $entry['title'];
$titleText = $title->getText();
// generate all variants of the link title text
$allTextVariants = $wgContLang->convertLinkToAllVariants($titleText);
// if link was not found (in first query), add all variants to query
if ( !isset($colours[$pdbk]) ){
foreach($allTextVariants as $textVariant){
if($textVariant != $titleText){
$variantTitle = Title::makeTitle( $ns, $textVariant );
if(is_null($variantTitle)) continue;
$linkBatch->addObj( $variantTitle );
$variantMap[$variantTitle->getPrefixedDBkey()][] = $key;
}
}
}
}
}
// process categories, check if a category exists in some variant
foreach( $categories as $category ){
$variants = $wgContLang->convertLinkToAllVariants($category);
foreach($variants as $variant){
if($variant != $category){
$variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) );
if(is_null($variantTitle)) continue;
$linkBatch->addObj( $variantTitle );
$categoryMap[$variant] = $category;
}
}
}
if(!$linkBatch->isEmpty()){
// construct query
$titleClause = $linkBatch->constructSet('page', $dbr);
$variantQuery = "SELECT page_id, page_namespace, page_title, page_is_redirect, page_len";
$variantQuery .= " FROM $page WHERE $titleClause";
$varRes = $dbr->query( $variantQuery, __METHOD__ );
// for each found variants, figure out link holders and replace
while ( $s = $dbr->fetchObject($varRes) ) {
$variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title );
$varPdbk = $variantTitle->getPrefixedDBkey();
$vardbk = $variantTitle->getDBkey();
$holderKeys = array();
if(isset($variantMap[$varPdbk])){
$holderKeys = $variantMap[$varPdbk];
$linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect );
$output->addLink( $variantTitle, $s->page_id );
}
// loop over link holders
foreach($holderKeys as $key){
list( $ns, $index ) = explode( ':', $key, 2 );
$entry =& $this->internals[$ns][$index];
$pdbk = $entry['pdbk'];
if(!isset($colours[$pdbk])){
// found link in some of the variants, replace the link holder data
$entry['title'] = $variantTitle;
$entry['pdbk'] = $varPdbk;
// set pdbk and colour
# FIXME: convoluted data flow
# The redirect status and length is passed to getLinkColour via the LinkCache
# Use formal parameters instead
$colours[$varPdbk] = $sk->getLinkColour( $variantTitle, $threshold );
$linkcolour_ids[$s->page_id] = $pdbk;
}
wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
}
// check if the object is a variant of a category
if(isset($categoryMap[$vardbk])){
$oldkey = $categoryMap[$vardbk];
if($oldkey != $vardbk)
$varCategories[$oldkey]=$vardbk;
}
}
// rebuild the categories in original order (if there are replacements)
if(count($varCategories)>0){
$newCats = array();
$originalCats = $output->getCategories();
foreach($originalCats as $cat => $sortkey){
// make the replacement
if( array_key_exists($cat,$varCategories) )
$newCats[$varCategories[$cat]] = $sortkey;
else $newCats[$cat] = $sortkey;
}
$this->parent->mOutput->setCategoryLinks($newCats);
}
}
}
# Construct search and replace arrays
wfProfileIn( __METHOD__.'-construct' );
$replacePairs = array();
foreach ( $this->internals as $ns => $entries ) {
foreach ( $entries as $index => $entry ) {
$pdbk = $entry['pdbk'];
$title = $entry['title'];
$query = isset( $entry['query'] ) ? $entry['query'] : '';
$key = "$ns:$index";
$searchkey = "<!--LINK $key-->";
if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] == 'new' ) {
$linkCache->addBadLinkObj( $title );
$colours[$pdbk] = 'new';
$output->addLink( $title, 0 );
$replacePairs[$searchkey] = $sk->makeBrokenLinkObj( $title,
$entry['text'],
$query );
} else {
$replacePairs[$searchkey] = $sk->makeColouredLinkObj( $title, $colours[$pdbk],
$entry['text'],
$query );
}
}
}
$replacer = new HashtableReplacer( $replacePairs, 1 );
wfProfileOut( __METHOD__.'-construct' );
# Do the thing
wfProfileIn( __METHOD__.'-replace' );
$text = preg_replace_callback(
'/(<!--LINK .*?-->)/',
$replacer->cb(),
$text);
wfProfileOut( __METHOD__.'-replace' );
wfProfileOut( __METHOD__ );
}
/**
* Replace interwiki links
*/
protected function replaceInterwiki( &$text ) {
if ( empty( $this->interwikis ) ) {
return;
}
wfProfileIn( __METHOD__ );
# Make interwiki link HTML
$sk = $this->parent->getOptions()->getSkin();
$replacePairs = array();
foreach( $this->interwikis as $key => $link ) {
$replacePairs[$key] = $sk->link( $link['title'], $link['text'] );
}
$replacer = new HashtableReplacer( $replacePairs, 1 );
$text = preg_replace_callback(
'/<!--IWLINK (.*?)-->/',
$replacer->cb(),
$text );
wfProfileOut( __METHOD__ );
}
/**
* Replace <!--LINK--> link placeholders with plain text of links
* (not HTML-formatted).
* @param string $text
* @return string
*/
function replaceText( $text ) {
wfProfileIn( __METHOD__ );
$text = preg_replace_callback(
'/<!--(LINK|IWLINK) (.*?)-->/',
array( &$this, 'replaceTextCallback' ),
$text );
wfProfileOut( __METHOD__ );
return $text;
}
/**
* @param array $matches
* @return string
* @private
*/
function replaceTextCallback( $matches ) {
$type = $matches[1];
$key = $matches[2];
if( $type == 'LINK' ) {
list( $ns, $index ) = explode( ':', $key, 2 );
if( isset( $this->internals[$ns][$index]['text'] ) ) {
return $this->internals[$ns][$index]['text'];
}
} elseif( $type == 'IWLINK' ) {
if( isset( $this->interwikis[$key]['text'] ) ) {
return $this->interwikis[$key]['text'];
}
}
return $matches[0];
}
}