2008-02-06 01:23:12 +00:00
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
class XmlTypeCheck {
|
|
|
|
|
/**
|
|
|
|
|
* Will be set to true or false to indicate whether the file is
|
|
|
|
|
* well-formed XML. Note that this doesn't check schema validity.
|
|
|
|
|
*/
|
|
|
|
|
public $wellFormed = false;
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-02-06 01:23:12 +00:00
|
|
|
/**
|
|
|
|
|
* Name of the document's root element, including any namespace
|
|
|
|
|
* as an expanded URL.
|
|
|
|
|
*/
|
|
|
|
|
public $rootElement = '';
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-02-06 01:23:12 +00:00
|
|
|
private $softNamespaces;
|
|
|
|
|
private $namespaces = array();
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-02-06 01:23:12 +00:00
|
|
|
/**
|
|
|
|
|
* @param $file string filename
|
|
|
|
|
* @param $softNamespaces bool
|
|
|
|
|
* If set to true, use of undeclared XML namespaces will be ignored.
|
|
|
|
|
* This matches the behavior of rsvg, but more compliant consumers
|
|
|
|
|
* such as Firefox will reject such files.
|
|
|
|
|
* Leave off for the default, stricter checks.
|
|
|
|
|
*/
|
|
|
|
|
function __construct( $file, $softNamespaces=false ) {
|
|
|
|
|
$this->softNamespaces = $softNamespaces;
|
|
|
|
|
$this->run( $file );
|
|
|
|
|
}
|
2008-09-19 13:39:30 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get the root element. Simple accessor to $rootElement
|
|
|
|
|
*/
|
|
|
|
|
public function getRootElement() {
|
|
|
|
|
return $this->rootElement;
|
|
|
|
|
}
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-02-06 01:23:12 +00:00
|
|
|
private function run( $fname ) {
|
|
|
|
|
if( $this->softNamespaces ) {
|
|
|
|
|
$parser = xml_parser_create( 'UTF-8' );
|
|
|
|
|
} else {
|
|
|
|
|
$parser = xml_parser_create_ns( 'UTF-8' );
|
|
|
|
|
}
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-02-06 01:23:12 +00:00
|
|
|
// case folding violates XML standard, turn it off
|
|
|
|
|
xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-02-06 01:23:12 +00:00
|
|
|
xml_set_element_handler( $parser, array( $this, 'elementOpen' ), false );
|
|
|
|
|
|
|
|
|
|
$file = fopen( $fname, "rb" );
|
|
|
|
|
do {
|
|
|
|
|
$chunk = fread( $file, 32768 );
|
|
|
|
|
$ret = xml_parse( $parser, $chunk, feof( $file ) );
|
|
|
|
|
if( $ret == 0 ) {
|
|
|
|
|
// XML isn't well-formed!
|
|
|
|
|
fclose( $file );
|
|
|
|
|
xml_parser_free( $parser );
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
} while( !feof( $file ) );
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-02-06 01:23:12 +00:00
|
|
|
$this->wellFormed = true;
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-02-06 01:23:12 +00:00
|
|
|
fclose( $file );
|
|
|
|
|
xml_parser_free( $parser );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function elementOpen( $parser, $name, $attribs ) {
|
|
|
|
|
if( $this->softNamespaces ) {
|
|
|
|
|
// Check namespaces manually, so expat doesn't throw
|
|
|
|
|
// errors on use of undeclared namespaces.
|
|
|
|
|
foreach( $attribs as $attrib => $val ) {
|
|
|
|
|
if( $attrib == 'xmlns' ) {
|
|
|
|
|
$this->namespaces[''] = $val;
|
|
|
|
|
} elseif( substr( $attrib, 0, strlen( 'xmlns:' ) ) == 'xmlns:' ) {
|
|
|
|
|
$this->namespaces[substr( $attrib, strlen( 'xmlns:' ) )] = $val;
|
|
|
|
|
}
|
|
|
|
|
}
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-02-06 01:23:12 +00:00
|
|
|
if( strpos( $name, ':' ) === false ) {
|
|
|
|
|
$ns = '';
|
|
|
|
|
$subname = $name;
|
|
|
|
|
} else {
|
|
|
|
|
list( $ns, $subname ) = explode( ':', $name, 2 );
|
|
|
|
|
}
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-02-06 01:23:12 +00:00
|
|
|
if( isset( $this->namespaces[$ns] ) ) {
|
|
|
|
|
$name = $this->namespaces[$ns] . ':' . $subname;
|
|
|
|
|
} else {
|
|
|
|
|
// Technically this is invalid for XML with Namespaces.
|
|
|
|
|
// But..... we'll just let it slide in soft mode.
|
|
|
|
|
}
|
|
|
|
|
}
|
2008-04-14 07:45:50 +00:00
|
|
|
|
2008-02-06 01:23:12 +00:00
|
|
|
// We only need the first open element
|
|
|
|
|
$this->rootElement = $name;
|
|
|
|
|
xml_set_element_handler( $parser, false, false );
|
|
|
|
|
}
|
|
|
|
|
}
|