Test case files and fixes for text-prefetch two-and-a-half-pass dump.

Will want to test a little more before making another dump. :P
This commit is contained in:
Brion Vibber 2005-10-19 00:05:22 +00:00
parent e224e31f84
commit 189d24ef2d
5 changed files with 246 additions and 18 deletions

View file

@ -18,6 +18,7 @@
class BaseDump {
var $reader = null;
var $atEnd = false;
var $atPageEnd = false;
var $lastPage = 0;
var $lastRev = 0;
@ -36,27 +37,31 @@ class BaseDump {
* @return string or null
*/
function prefetch( $page, $rev ) {
$page = intval( $page );
$rev = intval( $rev );
while( $this->lastPage < $page && !$this->atEnd ) {
$this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" );
$this->nextPage();
}
if( $this->lastPage > $page || $this->atEnd ) {
$this->debug( "BaseDump::prefetch already past page $page looking for rev $rev\n" );
$this->debug( "BaseDump::prefetch already past page $page looking for rev $rev [$this->lastPage, $this->lastRev]" );
return null;
}
while( $this->lastRev < $rev && !$this->atEnd ) {
while( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
$this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, looking for $page, $rev" );
$this->nextRev();
}
if( $this->lastRev == $rev ) {
$this->debug( "BaseDump::prefetch hit on $page, $rev\n" );
if( $this->lastRev == $rev && !$this->atEnd ) {
$this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" );
return $this->nextText();
} else {
$this->debug( "BaseDump::prefetch already past rev $rev on page $page\n" );
$this->debug( "BaseDump::prefetch already past rev $rev on page $page [$this->lastPage, $this->lastRev]" );
return null;
}
}
function debug( $str ) {
wfDebug( $str );
wfDebug( $str . "\n" );
//global $dumper;
//$dumper->progress( $str );
}
@ -65,19 +70,28 @@ class BaseDump {
* @access private
*/
function nextPage() {
$this->skipTo( 'page' );
$this->skipTo( 'id' );
$this->lastPage = intval( $this->nodeContents() );
$this->lastRev = 0;
if( $this->skipTo( 'page', 'mediawiki' ) ) {
if( $this->skipTo( 'id' ) ) {
$this->lastPage = intval( $this->nodeContents() );
$this->lastRev = 0;
$this->atPageEnd = false;
}
} else {
$this->atEnd = true;
}
}
/**
* @access private
*/
function nextRev() {
$this->skipTo( 'revision' );
$this->skipTo( 'id' );
$this->lastRev = intval( $this->nodeContents() );
if( $this->skipTo( 'revision' ) ) {
if( $this->skipTo( 'id' ) ) {
$this->lastRev = intval( $this->nodeContents() );
}
} else {
$this->atPageEnd = true;
}
}
/**
@ -91,7 +105,7 @@ class BaseDump {
/**
* @access private
*/
function skipTo( $name ) {
function skipTo( $name, $parent='page' ) {
if( $this->atEnd ) {
return false;
}
@ -100,6 +114,11 @@ class BaseDump {
$this->reader->name == $name ) {
return true;
}
if( $this->reader->nodeType == XMLREADER_END_ELEMENT &&
$this->reader->name == $parent ) {
$this->debug( "BaseDump::skipTo found </$parent> searching for <$name>" );
return false;
}
}
return $this->close();
}
@ -113,7 +132,7 @@ class BaseDump {
*/
function nodeContents() {
if( $this->atEnd ) {
return false;
return null;
}
if( $this->reader->isEmptyElement ) {
return "";
@ -139,7 +158,7 @@ class BaseDump {
function close() {
$this->reader->close();
$this->atEnd = true;
return false;
return null;
}
}

View file

@ -159,9 +159,11 @@ class TextPassDumper extends BackupDumper {
if( $name == 'revision' ) {
$this->egress->writeRevision( null, $this->buffer );
$this->buffer = "";
$this->thisRev = "";
} elseif( $name == 'page' ) {
$this->egress->writeClosePage( $this->buffer );
$this->buffer = "";
$this->thisPage = "";
} elseif( $name == 'mediawiki' ) {
$this->egress->writeCloseStream( $this->buffer );
$this->buffer = "";
@ -172,9 +174,9 @@ class TextPassDumper extends BackupDumper {
$this->clearOpenElement( null );
if( $this->lastName == "id" ) {
if( $this->state == "revision" ) {
$this->thisRev = intval( $data );
$this->thisRev .= $data;
} elseif( $this->state == "page" ) {
$this->thisPage = intval( $data );
$this->thisPage .= $data;
}
}
$this->buffer .= htmlspecialchars( $data );

View file

@ -0,0 +1,75 @@
<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">
<siteinfo>
<sitename>DemoWiki</sitename>
<base>http://example.com/wiki/Main_Page</base>
<generator>MediaWiki 1.5.0</generator>
<case>first-letter</case>
<namespaces>
<namespace key="-2">Media</namespace>
<namespace key="-1">Special</namespace>
<namespace key="0"></namespace>
<namespace key="1">Talk</namespace>
<namespace key="2">User</namespace>
<namespace key="3">User talk</namespace>
<namespace key="4">DemoWiki</namespace>
<namespace key="5">DemoWIki talk</namespace>
<namespace key="6">Image</namespace>
<namespace key="7">Image talk</namespace>
<namespace key="8">MediaWiki</namespace>
<namespace key="9">MediaWiki talk</namespace>
<namespace key="10">Template</namespace>
<namespace key="11">Template talk</namespace>
<namespace key="12">Help</namespace>
<namespace key="13">Help talk</namespace>
<namespace key="14">Category</namespace>
<namespace key="15">Category talk</namespace>
</namespaces>
</siteinfo>
<page>
<title>First page</title>
<id>1</id>
<revision>
<id>1</id>
<timestamp>2001-01-15T12:00:00Z</timestamp>
<contributor><ip>10.0.0.1</ip></contributor>
<comment>page 1, rev 1</comment>
<text>page 1, rev 1</text>
</revision>
<revision>
<id>2</id>
<timestamp>2001-01-15T12:00:00Z</timestamp>
<contributor><ip>10.0.0.1</ip></contributor>
<comment>page 1, rev 2</comment>
<text>page 1, rev 2</text>
</revision>
<revision>
<id>4</id>
<timestamp>2001-01-15T12:00:00Z</timestamp>
<contributor><ip>10.0.0.1</ip></contributor>
<comment>page 1, rev 4</comment>
<text>page 1, rev 4</text>
</revision>
</page>
<page>
<title>Second page</title>
<id>2</id>
<revision>
<id>3</id>
<timestamp>2001-01-15T12:00:00Z</timestamp>
<contributor><ip>10.0.0.1</ip></contributor>
<comment>page 2, rev 3</comment>
<text>page 2, rev 3</text>
</revision>
</page>
<page>
<title>Third page</title>
<id>3</id>
<revision>
<id>5</id>
<timestamp>2001-01-15T12:00:00Z</timestamp>
<contributor><ip>10.0.0.1</ip></contributor>
<comment>page 3, rev 5</comment>
<text>page 3, rev 5</text>
</revision>
</page>
</mediawiki>

View file

@ -0,0 +1,57 @@
<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">
<siteinfo>
<sitename>DemoWiki</sitename>
<base>http://example.com/wiki/Main_Page</base>
<generator>MediaWiki 1.5.0</generator>
<case>first-letter</case>
<namespaces>
<namespace key="-2">Media</namespace>
<namespace key="-1">Special</namespace>
<namespace key="0"></namespace>
<namespace key="1">Talk</namespace>
<namespace key="2">User</namespace>
<namespace key="3">User talk</namespace>
<namespace key="4">DemoWiki</namespace>
<namespace key="5">DemoWIki talk</namespace>
<namespace key="6">Image</namespace>
<namespace key="7">Image talk</namespace>
<namespace key="8">MediaWiki</namespace>
<namespace key="9">MediaWiki talk</namespace>
<namespace key="10">Template</namespace>
<namespace key="11">Template talk</namespace>
<namespace key="12">Help</namespace>
<namespace key="13">Help talk</namespace>
<namespace key="14">Category</namespace>
<namespace key="15">Category talk</namespace>
</namespaces>
</siteinfo>
<page>
<title>First page</title>
<id>1</id>
<revision>
<id>1</id>
<timestamp>2001-01-15T12:00:00Z</timestamp>
<contributor><ip>10.0.0.1</ip></contributor>
<comment>page 1, rev 1</comment>
<text>page 1, rev 1</text>
</revision>
<revision>
<id>2</id>
<timestamp>2001-01-15T12:00:00Z</timestamp>
<contributor><ip>10.0.0.1</ip></contributor>
<comment>page 1, rev 2</comment>
<text>page 1, rev 2</text>
</revision>
</page>
<page>
<title>Second page</title>
<id>2</id>
<revision>
<id>3</id>
<timestamp>2001-01-15T12:00:00Z</timestamp>
<contributor><ip>10.0.0.1</ip></contributor>
<comment>page 2, rev 3</comment>
<text>page 2, rev 3</text>
</revision>
</page>
</mediawiki>

View file

@ -0,0 +1,75 @@
<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">
<siteinfo>
<sitename>DemoWiki</sitename>
<base>http://example.com/wiki/Main_Page</base>
<generator>MediaWiki 1.5.0</generator>
<case>first-letter</case>
<namespaces>
<namespace key="-2">Media</namespace>
<namespace key="-1">Special</namespace>
<namespace key="0"></namespace>
<namespace key="1">Talk</namespace>
<namespace key="2">User</namespace>
<namespace key="3">User talk</namespace>
<namespace key="4">DemoWiki</namespace>
<namespace key="5">DemoWIki talk</namespace>
<namespace key="6">Image</namespace>
<namespace key="7">Image talk</namespace>
<namespace key="8">MediaWiki</namespace>
<namespace key="9">MediaWiki talk</namespace>
<namespace key="10">Template</namespace>
<namespace key="11">Template talk</namespace>
<namespace key="12">Help</namespace>
<namespace key="13">Help talk</namespace>
<namespace key="14">Category</namespace>
<namespace key="15">Category talk</namespace>
</namespaces>
</siteinfo>
<page>
<title>First page</title>
<id>1</id>
<revision>
<id>1</id>
<timestamp>2001-01-15T12:00:00Z</timestamp>
<contributor><ip>10.0.0.1</ip></contributor>
<comment>page 1, rev 1</comment>
<text id="1" />
</revision>
<revision>
<id>2</id>
<timestamp>2001-01-15T12:00:00Z</timestamp>
<contributor><ip>10.0.0.1</ip></contributor>
<comment>page 1, rev 2</comment>
<text id="2" />
</revision>
<revision>
<id>4</id>
<timestamp>2001-01-15T12:00:00Z</timestamp>
<contributor><ip>10.0.0.1</ip></contributor>
<comment>page 1, rev 4</comment>
<text id="4" />
</revision>
</page>
<page>
<title>Second page</title>
<id>2</id>
<revision>
<id>3</id>
<timestamp>2001-01-15T12:00:00Z</timestamp>
<contributor><ip>10.0.0.1</ip></contributor>
<comment>page 2, rev 3</comment>
<text id="3" />
</revision>
</page>
<page>
<title>Third page</title>
<id>3</id>
<revision>
<id>5</id>
<timestamp>2001-01-15T12:00:00Z</timestamp>
<contributor><ip>10.0.0.1</ip></contributor>
<comment>page 3, rev 5</comment>
<text id="5" />
</revision>
</page>
</mediawiki>