Test case files and fixes for text-prefetch two-and-a-half-pass dump.
Will want to test a little more before making another dump. :P
This commit is contained in:
parent
e224e31f84
commit
189d24ef2d
5 changed files with 246 additions and 18 deletions
|
|
@ -18,6 +18,7 @@
|
|||
class BaseDump {
|
||||
var $reader = null;
|
||||
var $atEnd = false;
|
||||
var $atPageEnd = false;
|
||||
var $lastPage = 0;
|
||||
var $lastRev = 0;
|
||||
|
||||
|
|
@ -36,27 +37,31 @@ class BaseDump {
|
|||
* @return string or null
|
||||
*/
|
||||
function prefetch( $page, $rev ) {
|
||||
$page = intval( $page );
|
||||
$rev = intval( $rev );
|
||||
while( $this->lastPage < $page && !$this->atEnd ) {
|
||||
$this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" );
|
||||
$this->nextPage();
|
||||
}
|
||||
if( $this->lastPage > $page || $this->atEnd ) {
|
||||
$this->debug( "BaseDump::prefetch already past page $page looking for rev $rev\n" );
|
||||
$this->debug( "BaseDump::prefetch already past page $page looking for rev $rev [$this->lastPage, $this->lastRev]" );
|
||||
return null;
|
||||
}
|
||||
while( $this->lastRev < $rev && !$this->atEnd ) {
|
||||
while( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
|
||||
$this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, looking for $page, $rev" );
|
||||
$this->nextRev();
|
||||
}
|
||||
if( $this->lastRev == $rev ) {
|
||||
$this->debug( "BaseDump::prefetch hit on $page, $rev\n" );
|
||||
if( $this->lastRev == $rev && !$this->atEnd ) {
|
||||
$this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" );
|
||||
return $this->nextText();
|
||||
} else {
|
||||
$this->debug( "BaseDump::prefetch already past rev $rev on page $page\n" );
|
||||
$this->debug( "BaseDump::prefetch already past rev $rev on page $page [$this->lastPage, $this->lastRev]" );
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function debug( $str ) {
|
||||
wfDebug( $str );
|
||||
wfDebug( $str . "\n" );
|
||||
//global $dumper;
|
||||
//$dumper->progress( $str );
|
||||
}
|
||||
|
|
@ -65,19 +70,28 @@ class BaseDump {
|
|||
* @access private
|
||||
*/
|
||||
function nextPage() {
|
||||
$this->skipTo( 'page' );
|
||||
$this->skipTo( 'id' );
|
||||
$this->lastPage = intval( $this->nodeContents() );
|
||||
$this->lastRev = 0;
|
||||
if( $this->skipTo( 'page', 'mediawiki' ) ) {
|
||||
if( $this->skipTo( 'id' ) ) {
|
||||
$this->lastPage = intval( $this->nodeContents() );
|
||||
$this->lastRev = 0;
|
||||
$this->atPageEnd = false;
|
||||
}
|
||||
} else {
|
||||
$this->atEnd = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @access private
|
||||
*/
|
||||
function nextRev() {
|
||||
$this->skipTo( 'revision' );
|
||||
$this->skipTo( 'id' );
|
||||
$this->lastRev = intval( $this->nodeContents() );
|
||||
if( $this->skipTo( 'revision' ) ) {
|
||||
if( $this->skipTo( 'id' ) ) {
|
||||
$this->lastRev = intval( $this->nodeContents() );
|
||||
}
|
||||
} else {
|
||||
$this->atPageEnd = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -91,7 +105,7 @@ class BaseDump {
|
|||
/**
|
||||
* @access private
|
||||
*/
|
||||
function skipTo( $name ) {
|
||||
function skipTo( $name, $parent='page' ) {
|
||||
if( $this->atEnd ) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -100,6 +114,11 @@ class BaseDump {
|
|||
$this->reader->name == $name ) {
|
||||
return true;
|
||||
}
|
||||
if( $this->reader->nodeType == XMLREADER_END_ELEMENT &&
|
||||
$this->reader->name == $parent ) {
|
||||
$this->debug( "BaseDump::skipTo found </$parent> searching for <$name>" );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return $this->close();
|
||||
}
|
||||
|
|
@ -113,7 +132,7 @@ class BaseDump {
|
|||
*/
|
||||
function nodeContents() {
|
||||
if( $this->atEnd ) {
|
||||
return false;
|
||||
return null;
|
||||
}
|
||||
if( $this->reader->isEmptyElement ) {
|
||||
return "";
|
||||
|
|
@ -139,7 +158,7 @@ class BaseDump {
|
|||
function close() {
|
||||
$this->reader->close();
|
||||
$this->atEnd = true;
|
||||
return false;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -159,9 +159,11 @@ class TextPassDumper extends BackupDumper {
|
|||
if( $name == 'revision' ) {
|
||||
$this->egress->writeRevision( null, $this->buffer );
|
||||
$this->buffer = "";
|
||||
$this->thisRev = "";
|
||||
} elseif( $name == 'page' ) {
|
||||
$this->egress->writeClosePage( $this->buffer );
|
||||
$this->buffer = "";
|
||||
$this->thisPage = "";
|
||||
} elseif( $name == 'mediawiki' ) {
|
||||
$this->egress->writeCloseStream( $this->buffer );
|
||||
$this->buffer = "";
|
||||
|
|
@ -172,9 +174,9 @@ class TextPassDumper extends BackupDumper {
|
|||
$this->clearOpenElement( null );
|
||||
if( $this->lastName == "id" ) {
|
||||
if( $this->state == "revision" ) {
|
||||
$this->thisRev = intval( $data );
|
||||
$this->thisRev .= $data;
|
||||
} elseif( $this->state == "page" ) {
|
||||
$this->thisPage = intval( $data );
|
||||
$this->thisPage .= $data;
|
||||
}
|
||||
}
|
||||
$this->buffer .= htmlspecialchars( $data );
|
||||
|
|
|
|||
75
tests/test-prefetch-current.xml
Normal file
75
tests/test-prefetch-current.xml
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">
|
||||
<siteinfo>
|
||||
<sitename>DemoWiki</sitename>
|
||||
<base>http://example.com/wiki/Main_Page</base>
|
||||
<generator>MediaWiki 1.5.0</generator>
|
||||
<case>first-letter</case>
|
||||
<namespaces>
|
||||
<namespace key="-2">Media</namespace>
|
||||
<namespace key="-1">Special</namespace>
|
||||
<namespace key="0"></namespace>
|
||||
<namespace key="1">Talk</namespace>
|
||||
<namespace key="2">User</namespace>
|
||||
<namespace key="3">User talk</namespace>
|
||||
<namespace key="4">DemoWiki</namespace>
|
||||
<namespace key="5">DemoWIki talk</namespace>
|
||||
<namespace key="6">Image</namespace>
|
||||
<namespace key="7">Image talk</namespace>
|
||||
<namespace key="8">MediaWiki</namespace>
|
||||
<namespace key="9">MediaWiki talk</namespace>
|
||||
<namespace key="10">Template</namespace>
|
||||
<namespace key="11">Template talk</namespace>
|
||||
<namespace key="12">Help</namespace>
|
||||
<namespace key="13">Help talk</namespace>
|
||||
<namespace key="14">Category</namespace>
|
||||
<namespace key="15">Category talk</namespace>
|
||||
</namespaces>
|
||||
</siteinfo>
|
||||
<page>
|
||||
<title>First page</title>
|
||||
<id>1</id>
|
||||
<revision>
|
||||
<id>1</id>
|
||||
<timestamp>2001-01-15T12:00:00Z</timestamp>
|
||||
<contributor><ip>10.0.0.1</ip></contributor>
|
||||
<comment>page 1, rev 1</comment>
|
||||
<text>page 1, rev 1</text>
|
||||
</revision>
|
||||
<revision>
|
||||
<id>2</id>
|
||||
<timestamp>2001-01-15T12:00:00Z</timestamp>
|
||||
<contributor><ip>10.0.0.1</ip></contributor>
|
||||
<comment>page 1, rev 2</comment>
|
||||
<text>page 1, rev 2</text>
|
||||
</revision>
|
||||
<revision>
|
||||
<id>4</id>
|
||||
<timestamp>2001-01-15T12:00:00Z</timestamp>
|
||||
<contributor><ip>10.0.0.1</ip></contributor>
|
||||
<comment>page 1, rev 4</comment>
|
||||
<text>page 1, rev 4</text>
|
||||
</revision>
|
||||
</page>
|
||||
<page>
|
||||
<title>Second page</title>
|
||||
<id>2</id>
|
||||
<revision>
|
||||
<id>3</id>
|
||||
<timestamp>2001-01-15T12:00:00Z</timestamp>
|
||||
<contributor><ip>10.0.0.1</ip></contributor>
|
||||
<comment>page 2, rev 3</comment>
|
||||
<text>page 2, rev 3</text>
|
||||
</revision>
|
||||
</page>
|
||||
<page>
|
||||
<title>Third page</title>
|
||||
<id>3</id>
|
||||
<revision>
|
||||
<id>5</id>
|
||||
<timestamp>2001-01-15T12:00:00Z</timestamp>
|
||||
<contributor><ip>10.0.0.1</ip></contributor>
|
||||
<comment>page 3, rev 5</comment>
|
||||
<text>page 3, rev 5</text>
|
||||
</revision>
|
||||
</page>
|
||||
</mediawiki>
|
||||
57
tests/test-prefetch-previous.xml
Normal file
57
tests/test-prefetch-previous.xml
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">
|
||||
<siteinfo>
|
||||
<sitename>DemoWiki</sitename>
|
||||
<base>http://example.com/wiki/Main_Page</base>
|
||||
<generator>MediaWiki 1.5.0</generator>
|
||||
<case>first-letter</case>
|
||||
<namespaces>
|
||||
<namespace key="-2">Media</namespace>
|
||||
<namespace key="-1">Special</namespace>
|
||||
<namespace key="0"></namespace>
|
||||
<namespace key="1">Talk</namespace>
|
||||
<namespace key="2">User</namespace>
|
||||
<namespace key="3">User talk</namespace>
|
||||
<namespace key="4">DemoWiki</namespace>
|
||||
<namespace key="5">DemoWIki talk</namespace>
|
||||
<namespace key="6">Image</namespace>
|
||||
<namespace key="7">Image talk</namespace>
|
||||
<namespace key="8">MediaWiki</namespace>
|
||||
<namespace key="9">MediaWiki talk</namespace>
|
||||
<namespace key="10">Template</namespace>
|
||||
<namespace key="11">Template talk</namespace>
|
||||
<namespace key="12">Help</namespace>
|
||||
<namespace key="13">Help talk</namespace>
|
||||
<namespace key="14">Category</namespace>
|
||||
<namespace key="15">Category talk</namespace>
|
||||
</namespaces>
|
||||
</siteinfo>
|
||||
<page>
|
||||
<title>First page</title>
|
||||
<id>1</id>
|
||||
<revision>
|
||||
<id>1</id>
|
||||
<timestamp>2001-01-15T12:00:00Z</timestamp>
|
||||
<contributor><ip>10.0.0.1</ip></contributor>
|
||||
<comment>page 1, rev 1</comment>
|
||||
<text>page 1, rev 1</text>
|
||||
</revision>
|
||||
<revision>
|
||||
<id>2</id>
|
||||
<timestamp>2001-01-15T12:00:00Z</timestamp>
|
||||
<contributor><ip>10.0.0.1</ip></contributor>
|
||||
<comment>page 1, rev 2</comment>
|
||||
<text>page 1, rev 2</text>
|
||||
</revision>
|
||||
</page>
|
||||
<page>
|
||||
<title>Second page</title>
|
||||
<id>2</id>
|
||||
<revision>
|
||||
<id>3</id>
|
||||
<timestamp>2001-01-15T12:00:00Z</timestamp>
|
||||
<contributor><ip>10.0.0.1</ip></contributor>
|
||||
<comment>page 2, rev 3</comment>
|
||||
<text>page 2, rev 3</text>
|
||||
</revision>
|
||||
</page>
|
||||
</mediawiki>
|
||||
75
tests/test-prefetch-stub.xml
Normal file
75
tests/test-prefetch-stub.xml
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">
|
||||
<siteinfo>
|
||||
<sitename>DemoWiki</sitename>
|
||||
<base>http://example.com/wiki/Main_Page</base>
|
||||
<generator>MediaWiki 1.5.0</generator>
|
||||
<case>first-letter</case>
|
||||
<namespaces>
|
||||
<namespace key="-2">Media</namespace>
|
||||
<namespace key="-1">Special</namespace>
|
||||
<namespace key="0"></namespace>
|
||||
<namespace key="1">Talk</namespace>
|
||||
<namespace key="2">User</namespace>
|
||||
<namespace key="3">User talk</namespace>
|
||||
<namespace key="4">DemoWiki</namespace>
|
||||
<namespace key="5">DemoWIki talk</namespace>
|
||||
<namespace key="6">Image</namespace>
|
||||
<namespace key="7">Image talk</namespace>
|
||||
<namespace key="8">MediaWiki</namespace>
|
||||
<namespace key="9">MediaWiki talk</namespace>
|
||||
<namespace key="10">Template</namespace>
|
||||
<namespace key="11">Template talk</namespace>
|
||||
<namespace key="12">Help</namespace>
|
||||
<namespace key="13">Help talk</namespace>
|
||||
<namespace key="14">Category</namespace>
|
||||
<namespace key="15">Category talk</namespace>
|
||||
</namespaces>
|
||||
</siteinfo>
|
||||
<page>
|
||||
<title>First page</title>
|
||||
<id>1</id>
|
||||
<revision>
|
||||
<id>1</id>
|
||||
<timestamp>2001-01-15T12:00:00Z</timestamp>
|
||||
<contributor><ip>10.0.0.1</ip></contributor>
|
||||
<comment>page 1, rev 1</comment>
|
||||
<text id="1" />
|
||||
</revision>
|
||||
<revision>
|
||||
<id>2</id>
|
||||
<timestamp>2001-01-15T12:00:00Z</timestamp>
|
||||
<contributor><ip>10.0.0.1</ip></contributor>
|
||||
<comment>page 1, rev 2</comment>
|
||||
<text id="2" />
|
||||
</revision>
|
||||
<revision>
|
||||
<id>4</id>
|
||||
<timestamp>2001-01-15T12:00:00Z</timestamp>
|
||||
<contributor><ip>10.0.0.1</ip></contributor>
|
||||
<comment>page 1, rev 4</comment>
|
||||
<text id="4" />
|
||||
</revision>
|
||||
</page>
|
||||
<page>
|
||||
<title>Second page</title>
|
||||
<id>2</id>
|
||||
<revision>
|
||||
<id>3</id>
|
||||
<timestamp>2001-01-15T12:00:00Z</timestamp>
|
||||
<contributor><ip>10.0.0.1</ip></contributor>
|
||||
<comment>page 2, rev 3</comment>
|
||||
<text id="3" />
|
||||
</revision>
|
||||
</page>
|
||||
<page>
|
||||
<title>Third page</title>
|
||||
<id>3</id>
|
||||
<revision>
|
||||
<id>5</id>
|
||||
<timestamp>2001-01-15T12:00:00Z</timestamp>
|
||||
<contributor><ip>10.0.0.1</ip></contributor>
|
||||
<comment>page 3, rev 5</comment>
|
||||
<text id="5" />
|
||||
</revision>
|
||||
</page>
|
||||
</mediawiki>
|
||||
Loading…
Reference in a new issue