From de0c4819d106c10a135387abbe5584952a2658bc Mon Sep 17 00:00:00 2001 From: mainframe98 Date: Fri, 11 Feb 2022 16:03:28 +0100 Subject: [PATCH] Add validation for abstract schema This adds an option to the schema generating maintenance scripts to validate abstract schemas and schema changes and a structure test to validate exisiting schemas and schema changes. Schemas are also validated when generating. The validation for the schema doesn't impose limits on table, index or column names as I couldn't find any reliable conventions for them. The structure tests only cover MediaWiki itself as there is no convention on where extensions store their abstract schema. Ideally, auto detection would be possible for sql/, but for now extensions have to define their own (thankfully trivial) tests. A couple of invalid definitions were fixed thanks to these tests. I aimed to be thorough, but not all parts of the abstract schema are completely clear, and Doctrine's documentation is not complete. As a result, not everything has a description field. Bug: T298320 Change-Id: I681d265317d4d1584869142ebb23d4098c06885f --- docs/abstract-schema-changes.schema.json | 22 ++ docs/abstract-schema-table.json | 208 ++++++++++++++++++ docs/abstract-schema.schema.json | 10 + includes/db/AbstractSchemaValidationError.php | 31 +++ includes/db/AbstractSchemaValidator.php | 117 ++++++++++ maintenance/includes/SchemaMaintenance.php | 22 ++ maintenance/tables.json | 1 - tests/phpunit/data/db/notschema.txt | 1 + tests/phpunit/data/db/tables.json | 6 +- .../AbstractSchemaValidationTest.php | 81 +++++++ .../db/AbstractSchemaValidatorTest.php | 66 ++++++ 11 files changed, 561 insertions(+), 4 deletions(-) create mode 100644 docs/abstract-schema-changes.schema.json create mode 100644 docs/abstract-schema-table.json create mode 100644 docs/abstract-schema.schema.json create mode 100644 includes/db/AbstractSchemaValidationError.php create mode 100644 includes/db/AbstractSchemaValidator.php create mode 100644 tests/phpunit/data/db/notschema.txt create mode 100644 tests/phpunit/structure/AbstractSchemaValidationTest.php create mode 100644 tests/phpunit/unit/includes/db/AbstractSchemaValidatorTest.php diff --git a/docs/abstract-schema-changes.schema.json b/docs/abstract-schema-changes.schema.json new file mode 100644 index 00000000000..bef3cf1c5b5 --- /dev/null +++ b/docs/abstract-schema-changes.schema.json @@ -0,0 +1,22 @@ +{ + "$schema": "https://json-schema.org/schema#", + "description": "MediaWiki abstract database schema schema", + "type": "object", + "additionalProperties": false, + "properties": { + "comment": { + "type": "string", + "description": "Comment describing the schema change" + }, + "before": { + "type": "object", + "description": "Schema before the change", + "$ref": "abstract-schema-table.json" + }, + "after": { + "type": "object", + "description": "Schema after the change", + "$ref": "abstract-schema-table.json" + } + } +} diff --git a/docs/abstract-schema-table.json b/docs/abstract-schema-table.json new file mode 100644 index 00000000000..b5514f36960 --- /dev/null +++ b/docs/abstract-schema-table.json @@ -0,0 +1,208 @@ +{ + "$schema": "https://json-schema.org/schema#", + "description": "Abstract description of a mediawiki database table", + "type": "object", + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Name of the table" + }, + "comment": { + "type": "string", + "description": "Comment describing the table" + }, + "columns": { + "type": "array", + "additionalItems": false, + "description": "Columns", + "minItems": 1, + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Name of the column" + }, + "comment": { + "type": "string", + "description": "Comment describing the column" + }, + "type": { + "type": "string", + "description": "Data type of the column", + "enum": [ + "bigint", + "binary", + "blob", + "datetimetz", + "float", + "integer", + "mwenum", + "mwtimestamp", + "mwtinyint", + "smallint", + "string", + "text" + ] + }, + "options": { + "type": "object", + "description": "Additional options", + "additionalProperties": false, + "properties": { + "autoincrement": { + "type": "boolean", + "description": "Indicates if the field should use an autoincremented value if no value was provided", + "default": false + }, + "default": { + "type": [ + "number", + "string", + "null" + ], + "description": "The default value of the column if no value was specified", + "default": null + }, + "fixed": { + "type": "boolean", + "description": "Indicates if the column should have a fixed length", + "default": false + }, + "length": { + "type": "number", + "description": "Length of the field.", + "default": null, + "minimum": 0 + }, + "notnull": { + "type": "boolean", + "description": "Indicates whether the column is nullable or not", + "default": true + }, + "unsigned": { + "type": "boolean", + "description": "If the column should be an unsigned integer", + "default": false + }, + "PlatformOptions": { + "type": "object", + "additionalProperties": false, + "properties": { + "version": { + "type": "boolean" + } + } + }, + "CustomSchemaOptions": { + "type": "object", + "description": "Custom schema options", + "additionalProperties": false, + "properties": { + "allowInfinite": { + "type": "boolean" + }, + "doublePrecision": { + "type": "boolean" + }, + "enum_values": { + "type": "array", + "description": "Values to use with type 'mwenum'", + "additionalItems": false, + "items": { + "type": "string" + }, + "uniqueItems": true + } + } + } + } + } + }, + "required": [ + "name", + "type" + ] + } + }, + "indexes": { + "type": "array", + "additionalItems": false, + "description": "Indexes", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "description": "Index name" + }, + "comment": { + "type": "string", + "description": "Comment describing the index" + }, + "columns": { + "type": "array", + "additionalItems": false, + "description": "Columns used by the index", + "items": { + "type": "string" + }, + "uniqueItems": true + }, + "unique": { + "type": "boolean", + "description": "If the index is unique" + }, + "flags": { + "type": "array", + "items": { + "type": "string" + } + }, + "options": { + "type": "object", + "properties": { + "lengths": { + "type": "array", + "items": { + "type": [ + "number", + "null" + ] + }, + "minItems": 1 + } + } + } + }, + "required": [ + "name", + "columns" + ] + } + }, + "pk": { + "type": "array", + "additionalItems": false, + "description": "Array of column names used in the primary key", + "items": { + "type": "string" + }, + "uniqueItems": true + }, + "table_options": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "name", + "columns", + "indexes" + ] +} diff --git a/docs/abstract-schema.schema.json b/docs/abstract-schema.schema.json new file mode 100644 index 00000000000..17c8b7ad81a --- /dev/null +++ b/docs/abstract-schema.schema.json @@ -0,0 +1,10 @@ +{ + "$schema": "https://json-schema.org/schema#", + "description": "MediaWiki abstract database schema schema", + "type": "array", + "additionalItems": false, + "items": { + "$ref": "abstract-schema-table.json" + }, + "minItems": 1 +} diff --git a/includes/db/AbstractSchemaValidationError.php b/includes/db/AbstractSchemaValidationError.php new file mode 100644 index 00000000000..10028959ee7 --- /dev/null +++ b/includes/db/AbstractSchemaValidationError.php @@ -0,0 +1,31 @@ +missingDepCallback = $missingDepCallback; + } + + /** + * @codeCoverageIgnore + * @return bool + */ + public function checkDependencies(): bool { + if ( !class_exists( Validator::class ) ) { + ( $this->missingDepCallback )( + 'The JsonSchema library cannot be found, please install it through composer.' + ); + return false; + } + + if ( !class_exists( JsonParser::class ) ) { + ( $this->missingDepCallback )( + 'The JSON lint library cannot be found, please install it through composer.' + ); + return false; + } + + return true; + } + + /** + * @param string $path file to validate + * @return bool true if passes validation + * @throws AbstractSchemaValidationError on any failure + */ + public function validate( string $path ): bool { + $contents = file_get_contents( $path ); + $jsonParser = new JsonParser(); + try { + $data = $jsonParser->parse( $contents, JsonParser::DETECT_KEY_CONFLICTS ); + } catch ( DuplicateKeyException $e ) { + throw new AbstractSchemaValidationError( $e->getMessage(), $e->getCode(), $e ); + } catch ( ParsingException $e ) { + throw new AbstractSchemaValidationError( "$path is not valid JSON", $e->getCode(), $e ); + } + + // Regular schema's are arrays, schema changes are objects. + if ( is_array( $data ) ) { + $schemaPath = __DIR__ . '/../../docs/abstract-schema.schema.json'; + } elseif ( is_object( $data ) ) { + $schemaPath = __DIR__ . '/../../docs/abstract-schema-changes.schema.json'; + } else { + throw new AbstractSchemaValidationError( "$path is not a supported JSON object" ); + } + + $validator = new Validator; + $validator->check( $data, (object)[ '$ref' => 'file://' . $schemaPath ] ); + if ( $validator->isValid() ) { + // All good. + return true; + } + + $out = "$path did not pass validation.\n"; + foreach ( $validator->getErrors() as $error ) { + $out .= "[{$error['property']}] {$error['message']}\n"; + } + throw new AbstractSchemaValidationError( $out ); + } +} diff --git a/maintenance/includes/SchemaMaintenance.php b/maintenance/includes/SchemaMaintenance.php index 1b18bf14aed..eff07a1e70c 100644 --- a/maintenance/includes/SchemaMaintenance.php +++ b/maintenance/includes/SchemaMaintenance.php @@ -22,6 +22,9 @@ * @ingroup Maintenance */ +use MediaWiki\DB\AbstractSchemaValidationError; +use MediaWiki\DB\AbstractSchemaValidator; + require_once __DIR__ . '/../Maintenance.php'; abstract class SchemaMaintenance extends Maintenance { @@ -64,6 +67,10 @@ abstract class SchemaMaintenance extends Maintenance { false, true ); + $this->addOption( + 'validate', + 'Validate the schema instead of generating sql files.' + ); } public function execute() { @@ -80,6 +87,12 @@ abstract class SchemaMaintenance extends Maintenance { $jsonPath = strtr( $jsonPath, '\\', '/' ); } + if ( $this->hasOption( 'validate' ) ) { + $this->getSchema( $jsonPath ); + + return; + } + // Allow to specify a folder and use a default name if ( is_dir( $jsonPath ) ) { $jsonPath .= '/tables.json'; @@ -205,6 +218,15 @@ abstract class SchemaMaintenance extends Maintenance { ); } + $validator = new AbstractSchemaValidator( function ( string $msg ): void { + $this->fatalError( $msg ); + } ); + try { + $validator->validate( $jsonPath ); + } catch ( AbstractSchemaValidationError $e ) { + $this->fatalError( $e->getMessage() ); + } + return $abstractSchema; } } diff --git a/maintenance/tables.json b/maintenance/tables.json index 34d27cc9dca..8a468067170 100644 --- a/maintenance/tables.json +++ b/maintenance/tables.json @@ -3699,7 +3699,6 @@ { "name": "si_text", "columns": [ "si_text" ], - "fulltext": true, "unique": false, "flags": [ "fulltext" diff --git a/tests/phpunit/data/db/notschema.txt b/tests/phpunit/data/db/notschema.txt new file mode 100644 index 00000000000..4bfdc14b82b --- /dev/null +++ b/tests/phpunit/data/db/notschema.txt @@ -0,0 +1 @@ +This is definitely not JSON and most certainly not an abstract schema. diff --git a/tests/phpunit/data/db/tables.json b/tests/phpunit/data/db/tables.json index 679c63b8010..f460d407c6a 100644 --- a/tests/phpunit/data/db/tables.json +++ b/tests/phpunit/data/db/tables.json @@ -7,19 +7,19 @@ "name": "actor_id", "comment": "Unique ID to identify each actor", "type": "bigint", - "options": { "Unsigned": true, "Notnull": true } + "options": { "unsigned": true, "notnull": true } }, { "name": "actor_user", "comment": "Key to user.user_id, or NULL for anonymous edits", "type": "integer", - "options": { "Unsigned": true } + "options": { "unsigned": true } }, { "name": "actor_name", "comment": "Text username or IP address", "type": "string", - "options": { "Length": 255, "Notnull": true } + "options": { "length": 255, "notnull": true } } ], "indexes": [ diff --git a/tests/phpunit/structure/AbstractSchemaValidationTest.php b/tests/phpunit/structure/AbstractSchemaValidationTest.php new file mode 100644 index 00000000000..20c1323fa0e --- /dev/null +++ b/tests/phpunit/structure/AbstractSchemaValidationTest.php @@ -0,0 +1,81 @@ +validator = new AbstractSchemaValidator( [ $this, 'markTestSkipped' ] ); + $this->validator->checkDependencies(); + } + + public static function provideSchemas(): array { + return [ + 'maintenance/tables.json' => [ __DIR__ . '/../../../maintenance/tables.json' ] + ]; + } + + /** + * @dataProvider provideSchemas + * @param string $path Path to tables.json file + */ + public function testSchemasPassValidation( string $path ): void { + try { + $this->validator->validate( $path ); + // All good + $this->assertTrue( true ); + } catch ( AbstractSchemaValidationError $e ) { + $this->fail( $e->getMessage() ); + } + } + + public static function provideSchemaChanges(): Generator { + foreach ( glob( __DIR__ . '/../../../maintenance/abstractSchemaChanges/*.json' ) as $schemaChange ) { + $fileName = pathinfo( $schemaChange, PATHINFO_BASENAME ); + + yield $fileName => [ $schemaChange ]; + } + } + + /** + * @dataProvider provideSchemaChanges + * @param string $path Path to tables.json file + */ + public function testSchemaChangesPassValidation( string $path ): void { + try { + $this->validator->validate( $path ); + // All good + $this->assertTrue( true ); + } catch ( AbstractSchemaValidationError $e ) { + $this->fail( $e->getMessage() ); + } + } +} diff --git a/tests/phpunit/unit/includes/db/AbstractSchemaValidatorTest.php b/tests/phpunit/unit/includes/db/AbstractSchemaValidatorTest.php new file mode 100644 index 00000000000..d4d3de5e824 --- /dev/null +++ b/tests/phpunit/unit/includes/db/AbstractSchemaValidatorTest.php @@ -0,0 +1,66 @@ +markTestSkipped( $msg ); + } ); + + if ( is_string( $expected ) ) { + $this->expectException( AbstractSchemaValidationError::class ); + $this->expectExceptionMessage( $expected ); + } + + $dir = __DIR__ . '/../../../data/db/'; + $this->assertSame( + $expected, + $validator->validate( $dir . $file ) + ); + } + + public static function provideValidate(): array { + return [ + [ + 'tables.json', + true + ], + [ + 'patch-drop-ct_tag.json', + true + ], + [ + 'notschema.txt', + 'notschema.txt is not valid JSON' + ] + ]; + } +}