2008-08-15 11:46:46 +00:00
< ? php
/* Copyright ( C ) 2008 Guy Van den Broeck < guy @ guyvdb . eu >
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 59 Temple Place - Suite 330 , Boston , MA 02111 - 1307 , USA .
* or see http :// www . gnu . org /
*/
2008-08-18 00:16:32 +00:00
/**
* The HTML differ depends on WikiDiff3
*/
global $IP ;
require_once ( " $IP /includes/Diff.php " );
2008-08-15 11:46:46 +00:00
/**
* Any element in the DOM tree of an HTML document .
*/
2008-08-18 00:16:32 +00:00
class Node {
public $parent ;
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
protected $parentTree ;
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
public $whiteBefore = false ;
public $whiteAfter = false ;
2008-08-15 11:46:46 +00:00
function __construct ( $parent ){
$this -> parent = $parent ;
}
public function getParentTree (){
2008-08-18 00:16:32 +00:00
if ( ! isset ( $this -> parentTree )){
if ( ! is_null ( $this -> parent )){
$this -> parentTree = $this -> parent -> getParentTree ();
$this -> parentTree [] = $this -> parent ;
} else {
$this -> parentTree = array ();
}
2008-08-15 11:46:46 +00:00
}
2008-08-18 00:16:32 +00:00
return $this -> parentTree ;
2008-08-15 11:46:46 +00:00
}
public function getLastCommonParent ( Node $other ){
$result = new LastCommonParentResult ();
$myParents = $this -> getParentTree ();
$otherParents = $other -> getParentTree ();
$i = 1 ;
$isSame = true ;
2008-08-18 00:16:32 +00:00
$nbMyParents = sizeof ( $myParents );
$nbOtherParents = sizeof ( $otherParents );
while ( $isSame && $i < $nbMyParents && $i < $nbOtherParents ) {
if ( ! $myParents [ $i ] -> openingTag === $otherParents [ $i ] -> openingTag ) {
2008-08-15 11:46:46 +00:00
$isSame = false ;
} else {
// After the while, the index i-1 must be the last common parent
$i ++ ;
}
}
2008-08-18 00:16:32 +00:00
$result -> lastCommonParentDepth = $i - 1 ;
$result -> parent = $myParents [ $i - 1 ];
2008-08-15 11:46:46 +00:00
if ( ! $isSame ) {
2008-08-18 00:16:32 +00:00
$result -> indexInLastCommonParent = $myParents [ $i - 1 ] -> getIndexOf ( $myParents [ $i ]);
$result -> splittingNeeded = true ;
} else if ( $nbMyParents < $nbOtherParents ) {
$result -> indexInLastCommonParent = $myParents [ $i - 1 ] -> getIndexOf ( $this );
} else if ( $nbMyParents > $nbOtherParents ) {
2008-08-15 11:46:46 +00:00
// All tags matched but there are tags left in this tree
2008-08-18 00:16:32 +00:00
$result -> indexInLastCommonParent = $myParents [ $i - 1 ] -> getIndexOf ( $myParents [ $i ]);
$result -> splittingNeeded = true ;
2008-08-15 11:46:46 +00:00
} else {
// All tags matched untill the very last one in both trees
// or there were no tags besides the BODY
2008-08-18 00:16:32 +00:00
$result -> indexInLastCommonParent = $myParents [ $i - 1 ] -> getIndexOf ( $this );
2008-08-15 11:46:46 +00:00
}
return $result ;
}
public function setParent ( $parent ) {
$this -> parent = $parent ;
2008-08-18 00:16:32 +00:00
unset ( $this -> parentTree );
2008-08-15 11:46:46 +00:00
}
public function inPre () {
$tree = $this -> getParentTree ();
foreach ( $tree as $ancestor ) {
if ( $ancestor -> isPre ()) {
return true ;
}
}
return false ;
}
}
/**
* Node that can contain other nodes . Represents an HTML tag .
*/
class TagNode extends Node {
public $children = array ();
2008-08-18 00:16:32 +00:00
public $qName ;
public $attributes = array ();
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
public $openingTag ;
2008-08-15 11:46:46 +00:00
function __construct ( $parent , $qName , /*array*/ $attributes ) {
parent :: __construct ( $parent );
$this -> qName = strtolower ( $qName );
foreach ( $attributes as $key => $value ){
$this -> attributes [ strtolower ( $key )] = $value ;
}
2008-08-18 00:16:32 +00:00
$this -> openingTag = '<' . $this -> qName ;
foreach ( $this -> attributes as $attribute => $value ) {
$this -> openingTag .= ' ' . $attribute . '="' . $value . '"' ;
}
return $this -> openingTag .= '>' ;
2008-08-15 11:46:46 +00:00
}
2008-08-18 00:16:32 +00:00
public function addChildAbsolute ( Node $node , $index ) {
array_splice ( $this -> children , $index , 0 , array ( $node ));
2008-08-15 11:46:46 +00:00
}
public function getIndexOf ( Node $child ) {
// don't trust array_search with objects
foreach ( $this -> children as $key => $value ){
if ( $value === $child ){
return $key ;
}
}
return NULL ;
}
public function getNbChildren () {
return count ( $this -> children );
}
2008-08-18 00:16:32 +00:00
public function getMinimalDeletedSet ( $id , & $allDeleted , & $somethingDeleted ) {
2008-08-15 11:46:46 +00:00
$nodes = array ();
2008-08-18 00:16:32 +00:00
if ( empty ( $this -> children )){
$allDeleted = false ;
$somethingDeleted = false ;
return $nodes ;
}
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
$allDeleted = false ;
$somethingDeleted = false ;
2008-08-15 11:46:46 +00:00
$hasNotDeletedDescendant = false ;
foreach ( $this -> children as $child ) {
2008-08-18 00:16:32 +00:00
$childrenChildren = $child -> getMinimalDeletedSet ( $id , $allDeleted_local , $somethingDeleted_local );
if ( $somethingDeleted_local ){
$nodes = array_merge ( $nodes , $childrenChildren );
$somethingDeleted = true ;
2008-08-15 11:46:46 +00:00
}
2008-08-18 00:16:32 +00:00
$hasNotDeletedDescendant |= ! $allDeleted_local ;
2008-08-15 11:46:46 +00:00
}
if ( ! $hasNotDeletedDescendant ) {
$nodes = array ( $this );
2008-08-18 00:16:32 +00:00
$allDeleted = true ;
2008-08-15 11:46:46 +00:00
}
return $nodes ;
}
public function splitUntill ( TagNode $parent , Node $split , $includeLeft ) {
$splitOccured = false ;
if ( $parent !== $this ) {
2008-08-18 00:16:32 +00:00
$part1 = new TagNode ( NULL , $this -> qName , $this -> attributes );
$part2 = new TagNode ( NULL , $this -> qName , $this -> attributes );
$part1 -> setParent ( $this -> parent );
$part2 -> setParent ( $this -> parent );
2008-08-15 11:46:46 +00:00
$i = 0 ;
$nbChildren = $this -> getNbChildren ();
while ( $i < $nbChildren && $this -> children [ $i ] !== $split ) {
$this -> children [ $i ] -> setParent ( $part1 );
2008-08-18 00:16:32 +00:00
$part1 -> children [] = $this -> children [ $i ];
2008-08-15 11:46:46 +00:00
++ $i ;
}
if ( $i < $nbChildren ) {
if ( $includeLeft ) {
$this -> children [ $i ] -> setParent ( $part1 );
2008-08-18 00:16:32 +00:00
$part1 -> children [] = $this -> children [ $i ];
2008-08-15 11:46:46 +00:00
} else {
$this -> children [ $i ] -> setParent ( $part2 );
2008-08-18 00:16:32 +00:00
$part2 -> children [] = $this -> children [ $i ];
2008-08-15 11:46:46 +00:00
}
++ $i ;
}
while ( $i < $nbChildren ) {
$this -> children [ $i ] -> setParent ( $part2 );
2008-08-18 00:16:32 +00:00
$part2 -> children [] = $this -> children [ $i ];
2008-08-15 11:46:46 +00:00
++ $i ;
}
$myindexinparent = $this -> parent -> getIndexOf ( $this );
2008-08-18 00:16:32 +00:00
if ( ! empty ( $part1 -> children ))
$this -> parent -> addChildAbsolute ( $part1 , $myindexinparent );
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
if ( ! empty ( $part2 -> children ))
$this -> parent -> addChildAbsolute ( $part2 , $myindexinparent );
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
if ( ! empty ( $part1 -> children ) && ! empty ( $part2 -> children )) {
2008-08-15 11:46:46 +00:00
$splitOccured = true ;
}
$this -> parent -> removeChild ( $myindexinparent );
if ( $includeLeft )
$this -> parent -> splitUntill ( $parent , $part1 , $includeLeft );
else
$this -> parent -> splitUntill ( $parent , $part2 , $includeLeft );
}
return $splitOccured ;
}
private function removeChild ( $index ) {
unset ( $this -> children [ $index ]);
$this -> children = array_values ( $this -> children );
}
public static $blocks = array ( 'html' => TRUE , 'body' => TRUE , 'p' => TRUE , 'blockquote' => TRUE ,
'h1' => TRUE , 'h2' => TRUE , 'h3' => TRUE , 'h4' => TRUE , 'h5' => TRUE , 'pre' => TRUE , 'div' => TRUE , 'ul' => TRUE , 'ol' => TRUE , 'li' => TRUE ,
'table' => TRUE , 'tbody' => TRUE , 'tr' => TRUE , 'td' => TRUE , 'th' => TRUE , 'br' => TRUE );
public function copyTree () {
2008-08-18 00:16:32 +00:00
$newThis = new TagNode ( NULL , $this -> qName , $this -> attributes );
$newThis -> whiteBefore = $this -> whiteBefore ;
$newThis -> whiteAfter = $this -> whiteAfter ;
2008-08-15 11:46:46 +00:00
foreach ( $this -> children as $child ) {
$newChild = $child -> copyTree ();
$newChild -> setParent ( $newThis );
2008-08-18 00:16:32 +00:00
$newThis -> children [] = $newChild ;
2008-08-15 11:46:46 +00:00
}
return $newThis ;
}
public function getMatchRatio ( TagNode $other ) {
$txtComp = new TextOnlyComparator ( $other );
return $txtComp -> getMatchRatio ( new TextOnlyComparator ( $this ));
}
public function expandWhiteSpace () {
$shift = 0 ;
$spaceAdded = false ;
$nbOriginalChildren = $this -> getNbChildren ();
for ( $i = 0 ; $i < $nbOriginalChildren ; ++ $i ) {
2008-08-18 00:16:32 +00:00
$child = $this -> children [ $i + $shift ];
2008-08-15 11:46:46 +00:00
if ( $child instanceof TagNode ){
if ( ! $child -> isPre ()) {
$child -> expandWhiteSpace ();
}
}
2008-08-18 00:16:32 +00:00
if ( ! $spaceAdded && $child -> whiteBefore ) {
2008-08-15 11:46:46 +00:00
$ws = new WhiteSpaceNode ( NULL , ' ' , $child -> getLeftMostChild ());
$ws -> setParent ( $this );
2008-08-18 00:16:32 +00:00
$this -> addChildAbsolute ( $ws , $i + ( $shift ++ ));
2008-08-15 11:46:46 +00:00
}
2008-08-18 00:16:32 +00:00
if ( $child -> whiteAfter ) {
2008-08-15 11:46:46 +00:00
$ws = new WhiteSpaceNode ( NULL , ' ' , $child -> getRightMostChild ());
$ws -> setParent ( $this );
2008-08-18 00:16:32 +00:00
$this -> addChildAbsolute ( $ws , $i + 1 + ( $shift ++ ));
2008-08-15 11:46:46 +00:00
$spaceAdded = true ;
} else {
$spaceAdded = false ;
}
}
}
public function getLeftMostChild () {
2008-08-18 00:16:32 +00:00
if ( empty ( $this -> children ))
2008-08-15 11:46:46 +00:00
return $this ;
2008-08-18 00:16:32 +00:00
return $this -> children [ 0 ] -> getLeftMostChild ();
2008-08-15 11:46:46 +00:00
}
public function getRightMostChild () {
2008-08-18 00:16:32 +00:00
if ( empty ( $this -> children ))
2008-08-15 11:46:46 +00:00
return $this ;
2008-08-18 00:16:32 +00:00
return $this -> children [ $this -> getNbChildren () - 1 ] -> getRightMostChild ();
2008-08-15 11:46:46 +00:00
}
public function isPre () {
2008-08-18 00:16:32 +00:00
return 0 == strcasecmp ( $this -> qName , 'pre' );
2008-08-15 11:46:46 +00:00
}
public static function toDiffLine ( TagNode $node ){
2008-08-18 00:16:32 +00:00
return $node -> openingTag ;
2008-08-15 11:46:46 +00:00
}
}
/**
* Represents a piece of text in the HTML file .
*/
class TextNode extends Node {
2008-08-18 00:16:32 +00:00
public $text ;
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
public $modification ;
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
function __construct ( $parent , $text ) {
2008-08-15 11:46:46 +00:00
parent :: __construct ( $parent );
$this -> modification = new Modification ( Modification :: NONE );
2008-08-18 00:16:32 +00:00
$this -> text = $text ;
2008-08-15 11:46:46 +00:00
}
public function copyTree () {
$clone = clone $this ;
$clone -> setParent ( NULL );
return $clone ;
}
public function getLeftMostChild () {
return $this ;
}
public function getRightMostChild () {
return $this ;
}
2008-08-18 00:16:32 +00:00
public function getMinimalDeletedSet ( $id , & $allDeleted , & $somethingDeleted ) {
if ( $this -> modification -> type == Modification :: REMOVED
&& $this -> modification -> id == $id ){
$somethingDeleted = true ;
$allDeleted = true ;
2008-08-15 11:46:46 +00:00
return array ( $this );
}
return array ();
}
public function isSameText ( $other ) {
if ( is_null ( $other ) || ! $other instanceof TextNode ){
return false ;
}
2008-08-18 00:16:32 +00:00
return str_replace ( '\n' , ' ' , $this -> text ) === str_replace ( '\n' , ' ' , $other -> text );
2008-08-15 11:46:46 +00:00
}
public static function toDiffLine ( TextNode $node ){
2008-08-18 00:16:32 +00:00
return str_replace ( '\n' , ' ' , $node -> text );
2008-08-15 11:46:46 +00:00
}
}
class WhiteSpaceNode extends TextNode {
function __construct ( $parent , $s , Node $like = NULL ) {
parent :: __construct ( $parent , $s );
if ( ! is_null ( $like ) && $like instanceof TextNode ){
2008-08-18 00:16:32 +00:00
$newModification = clone $like -> modification ;
$newModification -> firstOfID = false ;
$this -> modification = $newModification ;
2008-08-15 11:46:46 +00:00
}
}
}
/**
* Represents the root of a HTML document .
*/
class BodyNode extends TagNode {
function __construct () {
parent :: __construct ( NULL , 'body' , array ());
}
public function copyTree () {
$newThis = new BodyNode ();
foreach ( $this -> children as $child ) {
$newChild = $child -> copyTree ();
$newChild -> setParent ( $newThis );
2008-08-18 00:16:32 +00:00
$newThis -> children [] = $newChild ;
2008-08-15 11:46:46 +00:00
}
return $newThis ;
}
2008-08-18 00:16:32 +00:00
public function getMinimalDeletedSet ( $id , & $allDeleted , & $somethingDeleted ) {
2008-08-15 11:46:46 +00:00
$nodes = array ();
foreach ( $this -> children as $child ) {
2008-08-18 00:16:32 +00:00
$childrenChildren = $child -> getMinimalDeletedSet ( $id , $allDeleted , $somethingDeleted );
2008-08-15 11:46:46 +00:00
$nodes = array_merge ( $nodes , $childrenChildren );
}
return $nodes ;
}
}
/**
* Represents an image in HTML . Even though images do not contain any text they
* are independent visible objects on the page . They are logically a TextNode .
*/
class ImageNode extends TextNode {
private $attributes ;
function __construct ( TagNode $parent , /*array*/ $attrs ) {
if ( ! array_key_exists ( 'src' , $attrs )){
//wfDebug('Image without a source:');
foreach ( $attrs as $key => $value ){
//wfDebug("$key = $value");
}
parent :: __construct ( $parent , '<img></img>' );
} else {
parent :: __construct ( $parent , '<img>' . strtolower ( $attrs [ 'src' ]) . '</img>' );
}
$this -> attributes = $attrs ;
}
public function isSameText ( $other ) {
if ( is_null ( $other ) || ! $other instanceof ImageNode )
return false ;
2008-08-18 00:16:32 +00:00
return $this -> text === $other -> text ;
2008-08-15 11:46:46 +00:00
}
2008-08-18 00:16:32 +00:00
}
class DummyNode extends Node {
function __construct (){
// no op
2008-08-15 11:46:46 +00:00
}
}
/**
* When detecting the last common parent of two nodes , all results are stored as
* a LastCommonParentResult .
*/
class LastCommonParentResult {
// Parent
2008-08-18 00:16:32 +00:00
public $parent ;
2008-08-15 11:46:46 +00:00
// Splitting
2008-08-18 00:16:32 +00:00
public $splittingNeeded = false ;
2008-08-15 11:46:46 +00:00
// Depth
2008-08-18 00:16:32 +00:00
public $lastCommonParentDepth = - 1 ;
2008-08-15 11:46:46 +00:00
// Index
2008-08-18 00:16:32 +00:00
public $indexInLastCommonParent = - 1 ;
2008-08-15 11:46:46 +00:00
}
class Modification {
const NONE = 1 ;
const REMOVED = 2 ;
const ADDED = 4 ;
const CHANGED = 8 ;
2008-08-18 00:16:32 +00:00
public $type ;
public $id = - 1 ;
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
public $prevMod ;
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
public $nextMod ;
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
public $firstOfID = false ;
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
public $changes ;
2008-08-15 11:46:46 +00:00
function __construct ( $type ) {
$this -> type = $type ;
}
public static function typeToString ( $type ){
switch ( $type ){
case self :: NONE : return 'none' ;
case self :: REMOVED : return 'removed' ;
case self :: ADDED : return 'added' ;
case self :: CHANGED : return 'changed' ;
}
}
}
class DomTreeBuilder {
2008-08-18 00:16:32 +00:00
public $textNodes = array ();
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
public $bodyNode ;
2008-08-15 11:46:46 +00:00
private $currentParent ;
private $newWord = " " ;
protected $bodyStarted = false ;
protected $bodyEnded = false ;
private $whiteSpaceBeforeThis = false ;
private $lastSibling ;
2008-08-18 00:16:32 +00:00
private $notInPre = true ;
2008-08-15 11:46:46 +00:00
function __construct (){
$this -> bodyNode = $this -> currentParent = new BodyNode ();
2008-08-18 00:16:32 +00:00
$this -> lastSibling = new DummyNode ();
2008-08-15 11:46:46 +00:00
}
/**
* Must be called manually
*/
public function endDocument () {
$this -> endWord ();
//wfDebug(sizeof($this->textNodes) . ' text nodes in document.');
}
public function startElement ( $parser , $name , /*array*/ $attributes ) {
if ( ! strcasecmp ( $name , 'body' ) == 0 ){
//wfDebug("Starting $name node.");
$this -> endWord ();
2008-08-18 00:16:32 +00:00
$newNode = new TagNode ( $this -> currentParent , $name , $attributes );
$this -> currentParent -> children [] = $newNode ;
$this -> currentParent = $newNode ;
$this -> lastSibling = new DummyNode ();
if ( $this -> whiteSpaceBeforeThis && ! array_key_exists ( strtolower ( $this -> currentParent -> qName ), TagNode :: $blocks )) {
$this -> currentParent -> whiteBefore = true ;
2008-08-15 11:46:46 +00:00
}
$this -> whiteSpaceBeforeThis = false ;
2008-08-18 00:16:32 +00:00
if ( strcasecmp ( $name , 'pre' ) == 0 ){
$this -> notInPre = false ;
}
2008-08-15 11:46:46 +00:00
}
}
public function endElement ( $parser , $name ) {
if ( ! strcasecmp ( $name , 'body' ) == 0 ){
//wfDebug("Ending $name node.");
if ( 0 == strcasecmp ( $name , 'img' )) {
// Insert a dummy leaf for the image
2008-08-18 00:16:32 +00:00
$img = new ImageNode ( $this -> currentParent , $this -> currentParent -> attributes );
$this -> currentParent -> children [] = $img ;
$img -> whiteBefore = $this -> whiteSpaceBeforeThis ;
2008-08-15 11:46:46 +00:00
$this -> lastSibling = $img ;
$this -> textNodes [] = $img ;
}
$this -> endWord ();
2008-08-18 00:16:32 +00:00
if ( ! array_key_exists ( strtolower ( $this -> currentParent -> qName ), TagNode :: $blocks )) {
2008-08-15 11:46:46 +00:00
$this -> lastSibling = $this -> currentParent ;
} else {
2008-08-18 00:16:32 +00:00
$this -> lastSibling = new DummyNode ();
2008-08-15 11:46:46 +00:00
}
2008-08-18 00:16:32 +00:00
$this -> currentParent = $this -> currentParent -> parent ;
2008-08-15 11:46:46 +00:00
$this -> whiteSpaceBeforeThis = false ;
2008-08-18 00:16:32 +00:00
if ( ! $this -> notInPre && strcasecmp ( $name , 'pre' ) == 0 ){
$this -> notInPre = true ;
}
2008-08-15 11:46:46 +00:00
} else {
$this -> endDocument ();
}
}
2008-08-18 00:16:32 +00:00
const regex = '/([\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|\$]{1})/' ;
const whitespace = '/^[\s]{1}$/' ;
const delimiter = '/^[\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|\$]{1}$/' ;
2008-08-15 11:46:46 +00:00
public function characters ( $parser , $data ){
2008-08-18 00:16:32 +00:00
$matches = preg_split ( self :: regex , $data , - 1 , PREG_SPLIT_DELIM_CAPTURE );
foreach ( $matches as $word ){
if ( preg_match ( self :: whitespace , $word ) && $this -> notInPre ){
2008-08-15 11:46:46 +00:00
$this -> endWord ();
2008-08-18 00:16:32 +00:00
$this -> lastSibling -> whiteAfter = true ;
$this -> whiteSpaceBeforeThis = true ;
} else if ( preg_match ( self :: delimiter , $word )){
$this -> endWord ();
$textNode = new TextNode ( $this -> currentParent , $word );
$this -> currentParent -> children [] = $textNode ;
$textNode -> whiteBefore = $this -> whiteSpaceBeforeThis ;
$this -> whiteSpaceBeforeThis = false ;
$this -> lastSibling = $textNode ;
$this -> textNodes [] = $textNode ;
} else {
$this -> newWord .= $word ;
2008-08-15 11:46:46 +00:00
}
}
}
private function endWord () {
2008-08-18 00:16:32 +00:00
if ( ! empty ( $this -> newWord )) {
2008-08-15 11:46:46 +00:00
$node = new TextNode ( $this -> currentParent , $this -> newWord );
2008-08-18 00:16:32 +00:00
$this -> currentParent -> children [] = $node ;
$node -> whiteBefore = $this -> whiteSpaceBeforeThis ;
2008-08-15 11:46:46 +00:00
$this -> whiteSpaceBeforeThis = false ;
$this -> lastSibling = $node ;
$this -> textNodes [] = $node ;
$this -> newWord = " " ;
}
}
public function getDiffLines (){
return array_map ( array ( 'TextNode' , 'toDiffLine' ), $this -> textNodes );
}
}
class TextNodeDiffer {
private $textNodes ;
2008-08-18 00:16:32 +00:00
public $bodyNode ;
2008-08-15 11:46:46 +00:00
private $oldTextNodes ;
private $oldBodyNode ;
private $lastModified = array ();
function __construct ( DomTreeBuilder $tree , DomTreeBuilder $oldTree ) {
2008-08-18 00:16:32 +00:00
$this -> textNodes = $tree -> textNodes ;
$this -> bodyNode = $tree -> bodyNode ;
$this -> oldTextNodes = $oldTree -> textNodes ;
$this -> oldBodyNode = $oldTree -> bodyNode ;
2008-08-15 11:46:46 +00:00
}
private $newID = 0 ;
public function markAsNew ( $start , $end ) {
if ( $end <= $start )
return ;
if ( $this -> whiteAfterLastChangedPart )
2008-08-18 00:16:32 +00:00
$this -> textNodes [ $start ] -> whiteBefore = false ;
2008-08-15 11:46:46 +00:00
$nextLastModified = array ();
for ( $i = $start ; $i < $end ; ++ $i ) {
$mod = new Modification ( Modification :: ADDED );
2008-08-18 00:16:32 +00:00
$mod -> id = $this -> newID ;
2008-08-15 11:46:46 +00:00
if ( sizeof ( $this -> lastModified ) > 0 ) {
2008-08-18 00:16:32 +00:00
$mod -> prevMod = $this -> lastModified [ 0 ];
if ( is_null ( $this -> lastModified [ 0 ] -> nextMod )) {
2008-08-15 11:46:46 +00:00
foreach ( $this -> lastModified as $lastMod ) {
2008-08-18 00:16:32 +00:00
$lastMod -> nextMod = $mod ;
2008-08-15 11:46:46 +00:00
}
}
}
$nextLastModified [] = $mod ;
2008-08-18 00:16:32 +00:00
$this -> textNodes [ $i ] -> modification = $mod ;
2008-08-15 11:46:46 +00:00
}
if ( $start < $end ) {
2008-08-18 00:16:32 +00:00
$this -> textNodes [ $start ] -> modification -> firstOfID = true ;
2008-08-15 11:46:46 +00:00
}
++ $this -> newID ;
$this -> lastModified = $nextLastModified ;
}
private $changedID = 0 ;
private $changedIDUsed = false ;
public function handlePossibleChangedPart ( $leftstart , $leftend , $rightstart , $rightend ) {
$i = $rightstart ;
$j = $leftstart ;
if ( $this -> changedIDUsed ) {
++ $this -> changedID ;
$this -> changedIDUsed = false ;
}
$nextLastModified = array ();
$changes ;
while ( $i < $rightend ) {
$acthis = new AncestorComparator ( $this -> textNodes [ $i ] -> getParentTree ());
$acother = new AncestorComparator ( $this -> oldTextNodes [ $j ] -> getParentTree ());
$result = $acthis -> getResult ( $acother );
unset ( $acthis , $acother );
$nbLastModified = sizeof ( $this -> lastModified );
2008-08-18 00:16:32 +00:00
if ( $result -> changed ) {
2008-08-15 11:46:46 +00:00
$mod = new Modification ( Modification :: CHANGED );
if ( ! $this -> changedIDUsed ) {
2008-08-18 00:16:32 +00:00
$mod -> firstOfID = true ;
2008-08-15 11:46:46 +00:00
if ( sizeof ( $nextLastModified ) > 0 ) {
$this -> lastModified = $nextLastModified ;
$nextLastModified = array ();
}
2008-08-18 00:16:32 +00:00
} else if ( ! is_null ( $result -> changes ) && $result -> changes !== $this -> changes ) {
2008-08-15 11:46:46 +00:00
++ $this -> changedID ;
2008-08-18 00:16:32 +00:00
$mod -> firstOfID = true ;
2008-08-15 11:46:46 +00:00
if ( sizeof ( $nextLastModified ) > 0 ) {
$this -> lastModified = $nextLastModified ;
$nextLastModified = array ();
}
}
if ( $nbLastModified > 0 ) {
2008-08-18 00:16:32 +00:00
$mod -> prevMod = $this -> lastModified [ 0 ];
if ( is_null ( $this -> lastModified [ 0 ] -> nextMod )) {
2008-08-15 11:46:46 +00:00
foreach ( $this -> lastModified as $lastMod ) {
2008-08-18 00:16:32 +00:00
$lastMod -> nextMod = $mod ;
2008-08-15 11:46:46 +00:00
}
}
}
$nextLastModified [] = $mod ;
2008-08-18 00:16:32 +00:00
$mod -> changes = $result -> changes ;
$mod -> id = $this -> changedID ;
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
$this -> textNodes [ $i ] -> modification = $mod ;
$this -> changes = $result -> changes ;
2008-08-15 11:46:46 +00:00
$this -> changedIDUsed = true ;
} else if ( $this -> changedIDUsed ) {
++ $this -> changedID ;
$this -> changedIDUsed = false ;
}
++ $i ;
++ $j ;
}
if ( sizeof ( $nextLastModified ) > 0 ){
$this -> lastModified = $nextLastModified ;
}
}
// used to remove the whitespace between a red and green block
private $whiteAfterLastChangedPart = false ;
private $deletedID = 0 ;
public function markAsDeleted ( $start , $end , $before ) {
if ( $end <= $start )
return ;
2008-08-18 00:16:32 +00:00
if ( $before > 0 && $this -> textNodes [ $before - 1 ] -> whiteAfter ) {
2008-08-15 11:46:46 +00:00
$this -> whiteAfterLastChangedPart = true ;
} else {
$this -> whiteAfterLastChangedPart = false ;
}
$nextLastModified = array ();
for ( $i = $start ; $i < $end ; ++ $i ) {
$mod = new Modification ( Modification :: REMOVED );
2008-08-18 00:16:32 +00:00
$mod -> id = $this -> deletedID ;
2008-08-15 11:46:46 +00:00
if ( sizeof ( $this -> lastModified ) > 0 ) {
2008-08-18 00:16:32 +00:00
$mod -> prevMod = $this -> lastModified [ 0 ];
if ( is_null ( $this -> lastModified [ 0 ] -> nextMod )) {
2008-08-15 11:46:46 +00:00
foreach ( $this -> lastModified as $lastMod ) {
2008-08-18 00:16:32 +00:00
$lastMod -> nextMod = $mod ;
2008-08-15 11:46:46 +00:00
}
}
}
$nextLastModified [] = $mod ;
// oldTextNodes is used here because we're going to move its deleted
// elements
// to this tree!
2008-08-18 00:16:32 +00:00
$this -> oldTextNodes [ $i ] -> modification = $mod ;
2008-08-15 11:46:46 +00:00
}
2008-08-18 00:16:32 +00:00
$this -> oldTextNodes [ $start ] -> modification -> firstOfID = true ;
$root = $this -> oldTextNodes [ $start ] -> getLastCommonParent ( $this -> oldTextNodes [ $end - 1 ]) -> parent ;
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
$deletedNodes = $root -> getMinimalDeletedSet ( $this -> deletedID , $junk1 , $junk2 );
2008-08-15 11:46:46 +00:00
//wfDebug("Minimal set of deleted nodes of size " . sizeof($deletedNodes));
// Set prevLeaf to the leaf after which the old HTML needs to be
// inserted
if ( $before > 0 ){
$prevLeaf = $this -> textNodes [ $before - 1 ];
}
// Set nextLeaf to the leaf before which the old HTML needs to be
// inserted
if ( $before < sizeof ( $this -> textNodes )){
$nextLeaf = $this -> textNodes [ $before ];
}
while ( sizeof ( $deletedNodes ) > 0 ) {
if ( isset ( $prevLeaf )) {
$prevResult = $prevLeaf -> getLastCommonParent ( $deletedNodes [ 0 ]);
} else {
$prevResult = new LastCommonParentResult ();
2008-08-18 00:16:32 +00:00
$prevResult -> parent = $this -> bodyNode ;
$prevResult -> indexInLastCommonParent = 0 ;
2008-08-15 11:46:46 +00:00
}
if ( isset ( $nextleaf )) {
$nextResult = $nextLeaf -> getLastCommonParent ( $deletedNodes [ sizeof ( $deletedNodes ) - 1 ]);
} else {
$nextResult = new LastCommonParentResult ();
2008-08-18 00:16:32 +00:00
$nextResult -> parent = $this -> bodyNode ;
$nextResult -> indexInLastCommonParent = $this -> bodyNode -> getNbChildren ();
2008-08-15 11:46:46 +00:00
}
2008-08-18 00:16:32 +00:00
if ( $prevResult -> lastCommonParentDepth == $nextResult -> lastCommonParentDepth ) {
2008-08-15 11:46:46 +00:00
// We need some metric to choose which way to add-...
2008-08-18 00:16:32 +00:00
if ( $deletedNodes [ 0 ] -> parent === $deletedNodes [ sizeof ( $deletedNodes ) - 1 ] -> parent
&& $prevResult -> parent === $nextResult -> parent ) {
2008-08-15 11:46:46 +00:00
// The difference is not in the parent
2008-08-18 00:16:32 +00:00
$prevResult -> lastCommonParentDepth = $prevResult -> lastCommonParentDepth + 1 ;
2008-08-15 11:46:46 +00:00
} else {
// The difference is in the parent, so compare them
// now THIS is tricky
2008-08-18 00:16:32 +00:00
$distancePrev = $deletedNodes [ 0 ] -> parent -> getMatchRatio ( $prevResult -> parent );
$distanceNext = $deletedNodes [ sizeof ( $deletedNodes ) - 1 ] -> parent -> getMatchRatio ( $nextResult -> parent );
2008-08-15 11:46:46 +00:00
if ( $distancePrev <= $distanceNext ) {
2008-08-18 00:16:32 +00:00
$prevResult -> lastCommonParentDepth = $prevResult -> lastCommonParentDepth + 1 ;
2008-08-15 11:46:46 +00:00
} else {
2008-08-18 00:16:32 +00:00
$nextResult -> lastCommonParentDepth = $nextResult -> lastCommonParentDepth + 1 ;
2008-08-15 11:46:46 +00:00
}
}
}
2008-08-18 00:16:32 +00:00
if ( $prevResult -> lastCommonParentDepth > $nextResult -> lastCommonParentDepth ) {
2008-08-15 11:46:46 +00:00
// Inserting at the front
2008-08-18 00:16:32 +00:00
if ( $prevResult -> splittingNeeded ) {
$prevLeaf -> parent -> splitUntill ( $prevResult -> parent , $prevLeaf , true );
2008-08-15 11:46:46 +00:00
}
$prevLeaf = $deletedNodes [ 0 ] -> copyTree ();
unset ( $deletedNodes [ 0 ]);
$deletedNodes = array_values ( $deletedNodes );
2008-08-18 00:16:32 +00:00
$prevLeaf -> setParent ( $prevResult -> parent );
$prevResult -> parent -> addChildAbsolute ( $prevLeaf , $prevResult -> indexInLastCommonParent + 1 );
} else if ( $prevResult -> lastCommonParentDepth < $nextResult -> lastCommonParentDepth ) {
2008-08-15 11:46:46 +00:00
// Inserting at the back
2008-08-18 00:16:32 +00:00
if ( $nextResult -> splittingNeeded ) {
$splitOccured = $nextLeaf -> parent -> splitUntill ( $nextResult -> parent , $nextLeaf , false );
2008-08-15 11:46:46 +00:00
if ( $splitOccured ) {
// The place where to insert is shifted one place to the
// right
2008-08-18 00:16:32 +00:00
$nextResult -> indexInLastCommonParent = $nextResult -> indexInLastCommonParent + 1 ;
2008-08-15 11:46:46 +00:00
}
}
$nextLeaf = $deletedNodes [ sizeof ( deletedNodes ) - 1 ] -> copyTree ();
unset ( $deletedNodes [ sizeof ( deletedNodes ) - 1 ]);
$deletedNodes = array_values ( $deletedNodes );
2008-08-18 00:16:32 +00:00
$nextLeaf -> setParent ( $nextResult -> parent );
$nextResult -> parent -> addChildAbsolute ( $nextLeaf , $nextResult -> indexInLastCommonParent );
2008-08-15 11:46:46 +00:00
} else
throw new Exception ( " Uh? " );
}
$this -> lastModified = $nextLastModified ;
++ $this -> deletedID ;
}
public function expandWhiteSpace () {
2008-08-18 00:16:32 +00:00
$this -> bodyNode -> expandWhiteSpace ();
2008-08-15 11:46:46 +00:00
}
public function lengthNew (){
return sizeof ( $this -> textNodes );
}
public function lengthOld (){
return sizeof ( $this -> oldTextNodes );
}
}
class HTMLDiffer {
2008-08-18 00:16:32 +00:00
2008-08-15 11:46:46 +00:00
private $output ;
2008-08-18 00:16:32 +00:00
2008-08-15 11:46:46 +00:00
function __construct ( $output ){
$this -> output = $output ;
}
function htmlDiff ( $from , $to ){
2008-08-18 00:16:32 +00:00
wfProfileIn ( __METHOD__ );
2008-08-15 11:46:46 +00:00
// Create an XML parser
$xml_parser = xml_parser_create ( '' );
$domfrom = new DomTreeBuilder ();
// Set the functions to handle opening and closing tags
xml_set_element_handler ( $xml_parser , array ( $domfrom , " startElement " ), array ( $domfrom , " endElement " ));
// Set the function to handle blocks of character data
xml_set_character_data_handler ( $xml_parser , array ( $domfrom , " characters " ));
2008-08-18 00:16:32 +00:00
//wfDebug('Parsing '.strlen($from)." characters worth of HTML\n");
2008-08-15 11:46:46 +00:00
if ( ! xml_parse ( $xml_parser , '<?xml version="1.0" encoding="UTF-8"?>' . Sanitizer :: hackDocType () . '<body>' , FALSE )
|| ! xml_parse ( $xml_parser , $from , FALSE )
|| ! xml_parse ( $xml_parser , '</body>' , TRUE )){
2008-08-18 00:16:32 +00:00
wfDebug ( sprintf ( " XML error: %s at line %d \n " , xml_error_string ( xml_get_error_code ( $xml_parser )), xml_get_current_line_number ( $xml_parser )));
2008-08-15 11:46:46 +00:00
}
xml_parser_free ( $xml_parser );
unset ( $from );
$xml_parser = xml_parser_create ( '' );
$domto = new DomTreeBuilder ();
// Set the functions to handle opening and closing tags
xml_set_element_handler ( $xml_parser , array ( $domto , " startElement " ), array ( $domto , " endElement " ));
// Set the function to handle blocks of character data
xml_set_character_data_handler ( $xml_parser , array ( $domto , " characters " ));
2008-08-18 00:16:32 +00:00
//wfDebug('Parsing '.strlen($to)." characters worth of HTML\n");
2008-08-15 11:46:46 +00:00
if ( ! xml_parse ( $xml_parser , '<?xml version="1.0" encoding="UTF-8"?>' . Sanitizer :: hackDocType () . '<body>' , FALSE )
|| ! xml_parse ( $xml_parser , $to , FALSE )
|| ! xml_parse ( $xml_parser , '</body>' , TRUE )){
2008-08-18 00:16:32 +00:00
wfDebug ( sprintf ( " XML error in HTML diff: %s at line %d \n " , xml_error_string ( xml_get_error_code ( $xml_parser )), xml_get_current_line_number ( $xml_parser )));
2008-08-15 11:46:46 +00:00
}
xml_parser_free ( $xml_parser );
unset ( $to );
2008-08-18 00:16:32 +00:00
$diffengine = new WikiDiff3 ();
2008-08-15 11:46:46 +00:00
$differences = $this -> preProcess ( $diffengine -> diff_range ( $domfrom -> getDiffLines (), $domto -> getDiffLines ()));
unset ( $xml_parser , $diffengine );
2008-08-18 00:16:32 +00:00
2008-08-15 11:46:46 +00:00
$domdiffer = new TextNodeDiffer ( $domto , $domfrom );
$currentIndexLeft = 0 ;
$currentIndexRight = 0 ;
foreach ( $differences as $d ) {
if ( $d -> leftstart > $currentIndexLeft ) {
$domdiffer -> handlePossibleChangedPart ( $currentIndexLeft , $d -> leftstart ,
$currentIndexRight , $d -> rightstart );
}
if ( $d -> leftlength > 0 ) {
$domdiffer -> markAsDeleted ( $d -> leftstart , $d -> leftend , $d -> rightstart );
}
$domdiffer -> markAsNew ( $d -> rightstart , $d -> rightend );
$currentIndexLeft = $d -> leftend ;
$currentIndexRight = $d -> rightend ;
}
2008-08-18 00:16:32 +00:00
$oldLength = $domdiffer -> lengthOld ();
if ( $currentIndexLeft < $oldLength ) {
$domdiffer -> handlePossibleChangedPart ( $currentIndexLeft , $oldLength , $currentIndexRight , $domdiffer -> lengthNew ());
2008-08-15 11:46:46 +00:00
}
$domdiffer -> expandWhiteSpace ();
$output = new HTMLOutput ( 'htmldiff' , $this -> output );
2008-08-18 00:16:32 +00:00
$output -> parse ( $domdiffer -> bodyNode );
wfProfileOut ( __METHOD__ );
2008-08-15 11:46:46 +00:00
}
private function preProcess ( /*array*/ $differences ){
$newRanges = array ();
$nbDifferences = sizeof ( $differences );
for ( $i = 0 ; $i < $nbDifferences ; ++ $i ) {
$leftStart = $differences [ $i ] -> leftstart ;
$leftEnd = $differences [ $i ] -> leftend ;
$rightStart = $differences [ $i ] -> rightstart ;
$rightEnd = $differences [ $i ] -> rightend ;
$leftLength = $leftEnd - $leftStart ;
$rightLength = $rightEnd - $rightStart ;
while ( $i + 1 < $nbDifferences && self :: score ( $leftLength , $differences [ $i + 1 ] -> leftlength ,
$rightLength , $differences [ $i + 1 ] -> rightlength ) > ( $differences [ $i + 1 ] -> leftstart - $leftEnd )) {
$leftEnd = $differences [ $i + 1 ] -> leftend ;
$rightEnd = $differences [ $i + 1 ] -> rightend ;
$leftLength = $leftEnd - $leftStart ;
$rightLength = $rightEnd - $rightStart ;
++ $i ;
}
$newRanges [] = new RangeDifference ( $leftStart , $leftEnd , $rightStart , $rightEnd );
}
return $newRanges ;
}
/**
* Heuristic to merge differences for readability .
*/
public static function score ( $ll , $nll , $rl , $nrl ) {
if (( $ll == 0 && $nll == 0 )
|| ( $rl == 0 && $nrl == 0 )){
return 0 ;
}
$numbers = array ( $ll , $nll , $rl , $nrl );
$d = 0 ;
foreach ( $numbers as $number ) {
while ( $number > 3 ) {
$d += 3 ;
$number -= 3 ;
$number *= 0.5 ;
}
$d += $number ;
}
return $d / ( 1.5 * sizeof ( $numbers ));
}
}
class TextOnlyComparator {
public $leafs = array ();
function _construct ( TagNode $tree ) {
$this -> addRecursive ( $tree );
$this -> leafs = array_map ( array ( 'TextNode' , 'toDiffLine' ), $this -> leafs );
}
private function addRecursive ( TagNode $tree ) {
foreach ( $tree -> children as $child ) {
if ( $child instanceof TagNode ) {
$this -> addRecursive ( $child );
} else if ( $child instanceof TextNode ) {
$this -> leafs [] = $node ;
}
}
}
public function getMatchRatio ( TextOnlyComparator $other ) {
$nbOthers = sizeof ( $other -> leafs );
$nbThis = sizeof ( $this -> leafs );
if ( $nbOthers == 0 || $nbThis == 0 ){
return - log ( 0 );
}
2008-08-18 00:16:32 +00:00
$diffengine = new WikiDiff3 ( 25000 , 1.35 );
$diffengine -> diff ( $this -> leafs , $other -> leafs );
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
$lcsLength = $diffengine -> getLcsLength ();
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
$distanceThis = $nbThis - $lcsLength ;
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
return ( 2.0 - $lcsLength / $nbOthers - $lcsLength / $nbThis ) / 2.0 ;
2008-08-15 11:46:46 +00:00
}
2008-08-18 00:16:32 +00:00
}
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
class AncestorComparatorResult {
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
public $changed = false ;
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
public $changes = " " ;
2008-08-15 11:46:46 +00:00
}
/**
* A comparator used when calculating the difference in ancestry of two Nodes .
*/
class AncestorComparator {
public $ancestors ;
public $ancestorsText ;
function __construct ( /*array*/ $ancestors ) {
$this -> ancestors = $ancestors ;
$this -> ancestorsText = array_map ( array ( 'TagNode' , 'toDiffLine' ), $ancestors );
}
2008-08-18 00:16:32 +00:00
public $compareTxt = " " ;
2008-08-15 11:46:46 +00:00
public function getResult ( AncestorComparator $other ) {
$result = new AncestorComparatorResult ();
2008-08-18 00:16:32 +00:00
$diffengine = new WikiDiff3 ( 10000 , 1.35 );
2008-08-15 11:46:46 +00:00
$differences = $diffengine -> diff_range ( $this -> ancestorsText , $other -> ancestorsText );
if ( sizeof ( $differences ) == 0 ){
return $result ;
}
$changeTxt = new ChangeTextGenerator ( $this , $other );
2008-08-18 00:16:32 +00:00
$result -> changed = true ;
$result -> changes = $changeTxt -> getChanged ( $differences ) -> toString ();
2008-08-15 11:46:46 +00:00
return $result ;
}
}
class ChangeTextGenerator {
private $new ;
private $old ;
private $factory ;
function __construct ( AncestorComparator $old , AncestorComparator $new ) {
$this -> new = $new ;
$this -> old = $old ;
$this -> factory = new TagToStringFactory ();
}
public function getChanged ( /*array*/ $differences ) {
$txt = new ChangeText ;
$rootlistopened = false ;
if ( sizeof ( $differences ) > 1 ) {
$txt -> addHtml ( '<ul class="changelist">' );
$rootlistopened = true ;
}
$nbDifferences = sizeof ( $differences );
for ( $j = 0 ; $j < $nbDifferences ; ++ $j ) {
$d = $differences [ $j ];
$lvl1listopened = false ;
if ( $rootlistopened ) {
$txt -> addHtml ( '<li>' );
}
if ( $d -> leftlength + $d -> rightlength > 1 ) {
$txt -> addHtml ( '<ul class="changelist">' );
$lvl1listopened = true ;
}
// left are the old ones
for ( $i = $d -> leftstart ; $i < $d -> leftend ; ++ $i ) {
if ( $lvl1listopened ){
$txt -> addHtml ( '<li>' );
}
// add a bullet for a old tag
$this -> addTagOld ( $txt , $this -> old -> ancestors [ $i ]);
if ( $lvl1listopened ){
$txt -> addHtml ( '</li>' );
}
}
// right are the new ones
for ( $i = $d -> rightstart ; $i < $d -> rightend ; ++ $i ) {
if ( $lvl1listopened ){
$txt -> addHtml ( '<li>' );
}
// add a bullet for a new tag
$this -> addTagNew ( $txt , $this -> new -> ancestors [ $i ]);
if ( $lvl1listopened ){
$txt -> addHtml ( '</li>' );
}
}
if ( $lvl1listopened ) {
$txt -> addHtml ( '</ul>' );
}
if ( $rootlistopened ) {
$txt -> addHtml ( '</li>' );
}
}
if ( $rootlistopened ) {
$txt -> addHtml ( '</ul>' );
}
return $txt ;
}
private function addTagOld ( ChangeText $txt , TagNode $ancestor ) {
$this -> factory -> create ( $ancestor ) -> getRemovedDescription ( $txt );
}
private function addTagNew ( ChangeText $txt , TagNode $ancestor ) {
$this -> factory -> create ( $ancestor ) -> getAddedDescription ( $txt );
}
}
class ChangeText {
private $txt = " " ;
const newLine = " <br/> " ;
public function addText ( $s ) {
$s = $this -> clean ( $s );
$this -> txt .= $s ;
}
public function addHtml ( $s ) {
$this -> txt .= $s ;
}
public function addNewLine () {
$this -> addHtml ( self :: newLine );
}
public function toString () {
return $this -> txt ;
}
private function clean ( $s ) {
return htmlspecialchars ( $s );
}
}
class TagToStringFactory {
private static $containerTags = array (
'html' => TRUE ,
'body' => TRUE ,
'p' => TRUE ,
'blockquote' => TRUE ,
'h1' => TRUE ,
'h2' => TRUE ,
'h3' => TRUE ,
'h4' => TRUE ,
'h5' => TRUE ,
'pre' => TRUE ,
'div' => TRUE ,
'ul' => TRUE ,
'ol' => TRUE ,
'li' => TRUE ,
'table' => TRUE ,
'tbody' => TRUE ,
'tr' => TRUE ,
'td' => TRUE ,
'th' => TRUE ,
'br' => TRUE ,
'hr' => TRUE ,
'code' => TRUE ,
'dl' => TRUE ,
'dt' => TRUE ,
'dd' => TRUE ,
'input' => TRUE ,
'form' => TRUE ,
'img' => TRUE ,
// in-line tags that can be considered containers not styles
'span' => TRUE ,
'a' => TRUE
);
private static $styleTags = array (
'i' => TRUE ,
'b' => TRUE ,
'strong' => TRUE ,
'em' => TRUE ,
'font' => TRUE ,
'big' => TRUE ,
'del' => TRUE ,
'tt' => TRUE ,
'sub' => TRUE ,
'sup' => TRUE ,
'strike' => TRUE
);
const MOVED = 1 ;
const STYLE = 2 ;
const UNKNOWN = 4 ;
public function create ( TagNode $node ) {
2008-08-18 00:16:32 +00:00
$sem = $this -> getChangeSemantic ( $node -> qName );
if ( 0 == strcasecmp ( $node -> qName , 'a' )){
2008-08-15 11:46:46 +00:00
return new AnchorToString ( $node , $sem );
}
2008-08-18 00:16:32 +00:00
if ( 0 == strcasecmp ( $node -> qName , 'img' )){
2008-08-15 11:46:46 +00:00
return new NoContentTagToString ( $node , $sem );
}
return new TagToString ( $node , $sem );
}
protected function getChangeSemantic ( $qname ) {
if ( array_key_exists ( strtolower ( $qname ), self :: $containerTags )){
return self :: MOVED ;
}
if ( array_key_exists ( strtolower ( $qname ), self :: $styleTags )){
return self :: STYLE ;
}
return self :: UNKNOWN ;
}
}
class TagToString {
protected $node ;
protected $sem ;
function __construct ( TagNode $node , $sem ) {
$this -> node = $node ;
$this -> sem = $sem ;
}
public function getDescription () {
2008-08-18 00:16:32 +00:00
return $this -> getString ( 'diff-' . $this -> node -> qName );
2008-08-15 11:46:46 +00:00
}
public function getRemovedDescription ( ChangeText $txt ) {
if ( $this -> sem == TagToStringFactory :: MOVED ) {
$txt -> addText ( $this -> getMovedOutOf () . ' ' . strtolower ( $this -> getArticle ()) . ' ' );
$txt -> addHtml ( '<b>' );
$txt -> addText ( strtolower ( $this -> getDescription ()));
$txt -> addHtml ( '</b>' );
} else if ( $this -> sem == TagToStringFactory :: STYLE ) {
$txt -> addHtml ( '<b>' );
$txt -> addText ( $this -> getDescription ());
$txt -> addHtml ( '</b>' );
$txt -> addText ( ' ' . strtolower ( $this -> getStyleRemoved ()));
} else {
$txt -> addHtml ( '<b>' );
$txt -> addText ( $this -> getDescription ());
$txt -> addHtml ( '</b>' );
$txt -> addText ( ' ' . strtolower ( $this -> getRemoved ()));
}
2008-08-18 00:16:32 +00:00
$this -> addAttributes ( $txt , $this -> node -> attributes );
2008-08-15 11:46:46 +00:00
$txt -> addText ( '.' );
}
public function getAddedDescription ( ChangeText $txt ) {
if ( $this -> sem == TagToStringFactory :: MOVED ) {
$txt -> addText ( $this -> getMovedTo () . ' ' . strtolower ( $this -> getArticle ()) . ' ' );
$txt -> addHtml ( '<b>' );
$txt -> addText ( strtolower ( $this -> getDescription ()));
$txt -> addHtml ( '</b>' );
} else if ( $this -> sem == TagToStringFactory :: STYLE ) {
$txt -> addHtml ( '<b>' );
$txt -> addText ( $this -> getDescription ());
$txt -> addHtml ( '</b>' );
$txt -> addText ( ' ' . strtolower ( $this -> getStyleAdded ()));
} else {
$txt -> addHtml ( '<b>' );
$txt -> addText ( $this -> getDescription ());
$txt -> addHtml ( '</b>' );
$txt -> addText ( ' ' . strtolower ( $this -> getAdded ()));
}
2008-08-18 00:16:32 +00:00
$this -> addAttributes ( $txt , $this -> node -> attributes );
2008-08-15 11:46:46 +00:00
$txt -> addText ( '.' );
}
protected function getMovedTo () {
return $this -> getString ( 'diff-movedto' );
}
protected function getStyleAdded () {
return $this -> getString ( 'diff-styleadded' );
}
protected function getAdded () {
return $this -> getString ( 'diff-added' );
}
protected function getMovedOutOf () {
return $this -> getString ( 'diff-movedoutof' );
}
protected function getStyleRemoved () {
return $this -> getString ( 'diff-styleremoved' );
}
protected function getRemoved () {
return $this -> getString ( 'diff-removed' );
}
protected function addAttributes ( ChangeText $txt , array $attributes ) {
if ( sizeof ( $attributes ) < 1 )
return ;
$keys = array_keys ( $attributes );
$txt -> addText ( ' ' . strtolower ( $this -> getWith ()) . ' '
. $this -> translateArgument ( $keys [ 0 ]) . ' '
. $attributes [ $keys [ 0 ]]);
for ( $i = 1 ; $i < sizeof ( $attributes ) - 1 ; $i ++ ) {
$txt -> addText ( ', ' . $this -> translateArgument ( $keys [ $i ]) . ' '
. $attributes [ $keys [ $i ]]);
}
if ( sizeof ( $attributes ) > 1 ) {
$txt -> addText ( ' '
. strtolower ( $this -> getAnd ())
. ' '
. $this -> translateArgument ( $keys [ sizeof ( $attributes ) - 1 ]) . ' '
. $attributes [ $keys [ sizeof ( $attributes ) - 1 ]]);
}
}
private function getAnd () {
return $this -> getString ( 'diff-and' );
}
private function getWith () {
return $this -> getString ( 'diff-with' );
}
protected function translateArgument ( $name ) {
if ( 0 == strcasecmp ( $name , 'src' ))
return strtolower ( $this -> getSource ());
if ( 0 == strcasecmp ( $name , 'width' ))
return strtolower ( $this -> getWidth ());
if ( 0 == strcasecmp ( $name , 'height' ))
return strtolower ( $this -> getHeight ());
return $name ;
}
private function getHeight () {
return $this -> getString ( 'diff-height' );
}
private function getWidth () {
return $this -> getString ( 'diff-width' );
}
protected function getSource () {
return $this -> getString ( 'diff-source' );
}
protected function getArticle () {
2008-08-18 00:16:32 +00:00
return $this -> getString ( 'diff-' . $this -> node -> qName . '-article' );
2008-08-15 11:46:46 +00:00
}
public static $bundle = array (
'diff-movedto' => 'Moved to' ,
'diff-styleadded' => 'Style added' ,
'diff-added' => 'Added' ,
'diff-changedto' => 'Changed to' ,
'diff-movedoutof' => 'Moved out of' ,
'diff-styleremoved' => 'Style removed' ,
'diff-removed' => 'Removed' ,
'diff-changedfrom' => 'Changed from' ,
'diff-source' => 'Source' ,
'diff-withdestination' => 'With destination' ,
'diff-and' => 'And' ,
'diff-with' => 'With' ,
'diff-width' => 'Width' ,
'diff-height' => 'Height' ,
'diff-html-article' => 'A' ,
'diff-html' => 'Html page' ,
'diff-body-article' => 'A' ,
'diff-body' => 'Html document' ,
'diff-p-article' => 'A' ,
'diff-p' => 'Paragraph' ,
'diff-blockquote-article' => 'A' ,
'diff-blockquote' => 'Quote' ,
'diff-h1-article' => 'A' ,
'diff-h1' => 'Heading (level 1)' ,
'diff-h2-article' => 'A' ,
'diff-h2' => 'Heading (level 2)' ,
'diff-h3-article' => 'A' ,
'diff-h3' => 'Heading (level 3)' ,
'diff-h4-article' => 'A' ,
'diff-h4' => 'Heading (level 4)' ,
'diff-h5-article' => 'A' ,
'diff-h5' => 'Heading (level 5)' ,
'diff-pre-article' => 'A' ,
'diff-pre' => 'Preformatted block' ,
'diff-div-article' => 'A' ,
'diff-div' => 'Division' ,
'diff-ul-article' => 'An' ,
'diff-ul' => 'Unordered list' ,
'diff-ol-article' => 'An' ,
'diff-ol' => 'Ordered list' ,
'diff-li-article' => 'A' ,
'diff-li' => 'List item' ,
'diff-table-article' => 'A' ,
'diff-table' => 'Table' ,
'diff-tbody-article' => 'A' ,
'diff-tbody' => " Table's content " ,
'diff-tr-article' => 'A' ,
'diff-tr' => 'Row' ,
'diff-td-article' => 'A' ,
'diff-td' => 'Cell' ,
'diff-th-article' => 'A' ,
'diff-th' => 'Header' ,
'diff-br-article' => 'A' ,
'diff-br' => 'Break' ,
'diff-hr-article' => 'A' ,
'diff-hr' => 'Horizontal rule' ,
'diff-code-article' => 'A' ,
'diff-code' => 'Computer code block' ,
'diff-dl-article' => 'A' ,
'diff-dl' => 'Definition list' ,
'diff-dt-article' => 'A' ,
'diff-dt' => 'Definition term' ,
'diff-dd-article' => 'A' ,
'diff-dd' => 'Definition' ,
'diff-input-article' => 'An' ,
'diff-input' => 'Input' ,
'diff-form-article' => 'A' ,
'diff-form' => 'Form' ,
'diff-img-article' => 'An' ,
'diff-img' => 'Image' ,
'diff-span-article' => 'A' ,
'diff-span' => 'Span' ,
'diff-a-article' => 'A' ,
'diff-a' => 'Link' ,
'diff-i' => 'Italics' ,
'diff-b' => 'Bold' ,
'diff-strong' => 'Strong' ,
'diff-em' => 'Emphasis' ,
'diff-font' => 'Font' ,
'diff-big' => 'Big' ,
'diff-del' => 'Deleted' ,
'diff-tt' => 'Fixed width' ,
'diff-sub' => 'Subscript' ,
'diff-sup' => 'Superscript' ,
'diff-strike' => 'Strikethrough'
);
public function getString ( $key ) {
return self :: $bundle [ $key ];
}
}
class NoContentTagToString extends TagToString {
function __construct ( TagNode $node , $sem ) {
parent :: __construct ( $node , $sem );
}
public function getAddedDescription ( ChangeText $txt ) {
$txt . addText ( $this -> getChangedTo () . ' ' + strtolower ( $this -> getArticle ()) . ' ' );
$txt . addHtml ( '<b>' );
$txt . addText ( strtolower ( $this -> getDescription ()));
$txt . addHtml ( '</b>' );
2008-08-18 00:16:32 +00:00
$this -> addAttributes ( $txt , $this -> node -> attributes );
2008-08-15 11:46:46 +00:00
$txt . addText ( '.' );
}
private function getChangedTo () {
return $this -> getString ( 'diff-changedto' );
}
public function getRemovedDescription ( ChangeText $txt ) {
$txt . addText ( $this -> getChangedFrom () . ' ' + strtolower ( $this -> getArticle ()) . ' ' );
$txt . addHtml ( '<b>' );
$txt . addText ( strtolower ( $this -> getDescription ()));
$txt . addHtml ( '</b>' );
2008-08-18 00:16:32 +00:00
$this -> addAttributes ( $txt , $this -> node -> attributes );
2008-08-15 11:46:46 +00:00
$txt . addText ( '.' );
}
private function getChangedFrom () {
return $this -> getString ( 'diff-changedfrom' );
}
}
class AnchorToString extends TagToString {
function __construct ( TagNode $node , $sem ) {
parent :: __construct ( $node , $sem );
}
protected function addAttributes ( ChangeText $txt , array $attributes ) {
if ( array_key_exists ( 'href' , $attributes )) {
$txt -> addText ( ' ' . strtolower ( $this -> getWithDestination ()) . ' ' . $attributes [ 'href' ]);
unset ( $attributes [ 'href' ]);
}
parent :: addAttributes ( $txt , $attributes );
}
private function getWithDestination () {
return $this -> getString ( 'diff-withdestination' );
}
}
/**
* Takes a branch root and creates an HTML file for it .
*/
class HTMLOutput {
private $prefix ;
private $handler ;
function __construct ( $prefix , $handler ) {
$this -> prefix = $prefix ;
$this -> handler = $handler ;
}
public function parse ( TagNode $node ) {
2008-08-18 00:16:32 +00:00
$handler = & $this -> handler ;
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
if ( 0 != strcasecmp ( $node -> qName , 'img' ) && 0 != strcasecmp ( $node -> qName , 'body' )) {
$handler -> startElement ( $node -> qName , $node -> attributes );
2008-08-15 11:46:46 +00:00
}
$newStarted = false ;
$remStarted = false ;
$changeStarted = false ;
$changeTXT = '' ;
foreach ( $node -> children as $child ) {
if ( $child instanceof TagNode ) {
if ( $newStarted ) {
2008-08-18 00:16:32 +00:00
$handler -> endElement ( 'span' );
2008-08-15 11:46:46 +00:00
$newStarted = false ;
} else if ( $changeStarted ) {
2008-08-18 00:16:32 +00:00
$handler -> endElement ( 'span' );
2008-08-15 11:46:46 +00:00
$changeStarted = false ;
} else if ( $remStarted ) {
2008-08-18 00:16:32 +00:00
$handler -> endElement ( 'span' );
2008-08-15 11:46:46 +00:00
$remStarted = false ;
}
$this -> parse ( $child );
} else if ( $child instanceof TextNode ) {
2008-08-18 00:16:32 +00:00
$mod = $child -> modification ;
2008-08-15 11:46:46 +00:00
2008-08-18 00:16:32 +00:00
if ( $newStarted && ( $mod -> type != Modification :: ADDED || $mod -> firstOfID )) {
$handler -> endElement ( 'span' );
2008-08-15 11:46:46 +00:00
$newStarted = false ;
2008-08-18 00:16:32 +00:00
} else if ( $changeStarted && ( $mod -> type != Modification :: CHANGED || $mod -> changes != $changeTXT || $mod -> firstOfID )) {
$handler -> endElement ( 'span' );
2008-08-15 11:46:46 +00:00
$changeStarted = false ;
2008-08-18 00:16:32 +00:00
} else if ( $remStarted && ( $mod -> type != Modification :: REMOVED || $mod -> firstOfID )) {
$handler -> endElement ( 'span' );
2008-08-15 11:46:46 +00:00
$remStarted = false ;
}
// no else because a removed part can just be closed and a new
// part can start
2008-08-18 00:16:32 +00:00
if ( ! $newStarted && $mod -> type == Modification :: ADDED ) {
2008-08-15 11:46:46 +00:00
$attrs = array ( 'class' => 'diff-html-added' );
2008-08-18 00:16:32 +00:00
if ( $mod -> firstOfID ) {
$attrs [ 'id' ] = 'added-' . $this -> prefix . '-' . $mod -> id ;
2008-08-15 11:46:46 +00:00
}
$this -> addAttributes ( $mod , $attrs );
$attrs [ 'onclick' ] = 'return tipA(constructToolTipA(this));' ;
2008-08-18 00:16:32 +00:00
$handler -> startElement ( 'span' , $attrs );
2008-08-15 11:46:46 +00:00
$newStarted = true ;
2008-08-18 00:16:32 +00:00
} else if ( ! $changeStarted && $mod -> type == Modification :: CHANGED ) {
2008-08-15 11:46:46 +00:00
$attrs = array ( 'class' => 'diff-html-changed' );
2008-08-18 00:16:32 +00:00
if ( $mod -> firstOfID ) {
$attrs [ 'id' ] = 'changed-' . $this -> prefix . '-' . $mod -> id ;
2008-08-15 11:46:46 +00:00
}
$this -> addAttributes ( $mod , $attrs );
$attrs [ 'onclick' ] = 'return tipC(constructToolTipC(this));' ;
2008-08-18 00:16:32 +00:00
$handler -> startElement ( 'span' , $attrs );
2008-08-15 11:46:46 +00:00
//tooltip
2008-08-18 00:16:32 +00:00
$handler -> startElement ( 'span' , array ( 'class' => 'tip' ));
$handler -> characters ( $mod -> changes );
$handler -> endElement ( 'span' );
2008-08-15 11:46:46 +00:00
$changeStarted = true ;
2008-08-18 00:16:32 +00:00
$changeTXT = $mod -> changes ;
} else if ( ! $remStarted && $mod -> type == Modification :: REMOVED ) {
2008-08-15 11:46:46 +00:00
$attrs = array ( 'class' => 'diff-html-removed' );
2008-08-18 00:16:32 +00:00
if ( $mod -> firstOfID ) {
$attrs [ 'id' ] = 'removed-' . $this -> prefix . '-' . $mod -> id ;
2008-08-15 11:46:46 +00:00
}
$this -> addAttributes ( $mod , $attrs );
$attrs [ 'onclick' ] = 'return tipR(constructToolTipR(this));' ;
2008-08-18 00:16:32 +00:00
$handler -> startElement ( 'span' , $attrs );
2008-08-15 11:46:46 +00:00
$remStarted = true ;
}
2008-08-18 00:16:32 +00:00
$chars = $child -> text ;
2008-08-15 11:46:46 +00:00
if ( $child instanceof ImageNode ) {
$this -> writeImage ( $child );
} else {
2008-08-18 00:16:32 +00:00
$handler -> characters ( $chars );
2008-08-15 11:46:46 +00:00
}
}
}
if ( $newStarted ) {
2008-08-18 00:16:32 +00:00
$handler -> endElement ( 'span' );
2008-08-15 11:46:46 +00:00
$newStarted = false ;
} else if ( $changeStarted ) {
2008-08-18 00:16:32 +00:00
$handler -> endElement ( 'span' );
2008-08-15 11:46:46 +00:00
$changeStarted = false ;
} else if ( $remStarted ) {
2008-08-18 00:16:32 +00:00
$handler -> endElement ( 'span' );
2008-08-15 11:46:46 +00:00
$remStarted = false ;
}
2008-08-18 00:16:32 +00:00
if ( 0 != strcasecmp ( $node -> qName , 'img' )
&& 0 != strcasecmp ( $node -> qName , 'body' ))
$handler -> endElement ( $node -> qName );
2008-08-15 11:46:46 +00:00
}
private function writeImage ( ImageNode $imgNode ){
2008-08-18 00:16:32 +00:00
$attrs = $imgNode -> attributes ;
if ( $imgNode -> modification -> type == Modification :: REMOVED )
2008-08-15 11:46:46 +00:00
$attrs [ 'changeType' ] = 'diff-removed-image' ;
2008-08-18 00:16:32 +00:00
else if ( $imgNode -> modification -> type == Modification :: ADDED )
2008-08-15 11:46:46 +00:00
$attrs [ 'changeType' ] = 'diff-added-image' ;
$attrs [ 'onload' ] = 'updateOverlays()' ;
$attrs [ 'onError' ] = 'updateOverlays()' ;
$attrs [ 'onAbort' ] = 'updateOverlays()' ;
$this -> handler -> startElement ( 'img' , $attrs );
$this -> handler -> endElement ( 'img' );
}
private function addAttributes ( Modification $mod , /*array*/ & $attrs ) {
2008-08-18 00:16:32 +00:00
if ( is_null ( $mod -> prevMod )) {
2008-08-15 11:46:46 +00:00
$previous = 'first-' . $this -> prefix ;
} else {
2008-08-18 00:16:32 +00:00
$previous = Modification :: typeToString ( $mod -> prevMod -> type ) . '-' . $this -> prefix . '-'
. $mod -> prevMod -> id ;
2008-08-15 11:46:46 +00:00
}
$attrs [ 'previous' ] = $previous ;
2008-08-18 00:16:32 +00:00
$changeId = Modification :: typeToString ( $mod -> type ) . '-' + $this -> prefix . '-' . $mod -> id ;
2008-08-15 11:46:46 +00:00
$attrs [ 'changeId' ] = $changeId ;
2008-08-18 00:16:32 +00:00
if ( is_null ( $mod -> nextMod )) {
2008-08-15 11:46:46 +00:00
$next = 'last-' . $this -> prefix ;
} else {
2008-08-18 00:16:32 +00:00
$next = Modification :: typeToString ( $mod -> nextMod -> type ) . '-' . $this -> prefix . '-'
. $mod -> nextMod -> id ;
2008-08-15 11:46:46 +00:00
}
$attrs [ 'next' ] = $next ;
}
}
class EchoingContentHandler {
function startElement ( $qname , /*array*/ $arguments ){
echo '<' . $qname ;
foreach ( $arguments as $key => $value ){
echo ' ' . $key . '="' . Sanitizer :: encodeAttribute ( $value ) . '"' ;
}
echo '>' ;
}
function endElement ( $qname ){
echo '</' . $qname . '>' ;
}
function characters ( $chars ){
echo $chars ;
}
}
class DelegatingContentHandler {
2008-08-18 00:16:32 +00:00
2008-08-15 11:46:46 +00:00
private $delegate ;
2008-08-18 00:16:32 +00:00
2008-08-15 11:46:46 +00:00
function __construct ( $delegate ){
$this -> delegate = $delegate ;
}
function startElement ( $qname , /*array*/ $arguments ){
$this -> delegate -> addHtml ( '<' . $qname ) ;
foreach ( $arguments as $key => $value ){
$this -> delegate -> addHtml ( ' ' . $key . '="' . Sanitizer :: encodeAttribute ( $value ) . '"' );
}
$this -> delegate -> addHtml ( '>' );
}
function endElement ( $qname ){
$this -> delegate -> addHtml ( '</' . $qname . '>' );
}
function characters ( $chars ){
$this -> delegate -> addHtml ( $chars );
}
}