Migrate page to abstract schema

Postgres:
 - Change page_namespace from smallint to int
 - Change page_random from numeric with arbitrary precision to float
 - Make page_touched not nullable

MySQL/SQLite:
 - Change datatype of page_title from varchar (with binary collation)
   to varbinary(255)
 - Drop default empty string from timestamp field of page_touched

Bug: T230428
Bug: T164898
Change-Id: Ibdaf332ea1da309d31d35a6ebbc1b8fefced335e
This commit is contained in:
Ammar Abdulhamid 2020-11-23 05:33:36 +01:00 committed by Ammarpad
parent b0f893f4ad
commit 6a3aa5b5a2
12 changed files with 247 additions and 106 deletions

View file

@ -250,6 +250,8 @@ class MysqlUpdater extends DatabaseUpdater {
'patch-recentchanges-rc_new_name_timestamp.sql' ],
[ 'dropDefault', 'archive', 'ar_timestamp' ],
[ 'modifyField', 'archive', 'ar_title', 'patch-archive-ar_title-varbinary.sql' ],
[ 'modifyField', 'page', 'page_title', 'patch-page-page_title-varbinary.sql' ],
[ 'dropDefault', 'page', 'page_touched' ],
];
}

View file

@ -449,7 +449,7 @@ class PostgresUpdater extends DatabaseUpdater {
[ 'changeNullableField', 'revision_actor_temp', 'revactor_page', 'NOT NULL', true ],
[ 'renameIndex', 'watchlist', 'namespace_title', 'wl_namespace_title' ],
[ 'dropFkey', 'page_props', 'pp_page' ],
// Moved from the Schema SQL file to here in 1.36
// page_props primary key change moved from the Schema SQL file to here in 1.36
[ 'changePrimaryKey', 'page_props', [ 'pp_page', 'pp_propname' ], 'page_props_pk' ],
[ 'setDefault','job', 'job_cmd', '' ],
[ 'changeField', 'job', 'job_namespace', 'INTEGER', '' ],
@ -622,6 +622,9 @@ class PostgresUpdater extends DatabaseUpdater {
[ 'renameIndex', 'mwuser', 'user_email_token_idx', 'user_email_token' ],
[ 'addPgIndex', 'mwuser', 'user_email', '(user_email)' ],
[ 'addPgIndex', 'mwuser', 'user_name', '(user_name)', true ],
[ 'changeField', 'page', 'page_namespace', 'INTEGER', '' ],
[ 'changeNullableField', 'page', 'page_touched', 'NOT NULL', true ],
[ 'changeField', 'page', 'page_random', 'FLOAT', '' ],
];
}

View file

@ -214,6 +214,8 @@ class SqliteUpdater extends DatabaseUpdater {
[ 'renameIndex', 'recentchanges', 'new_name_timestamp', 'rc_new_name_timestamp', false,
'patch-recentchanges-rc_new_name_timestamp.sql' ],
[ 'modifyField', 'archive', 'ar_title', 'patch-archive-ar_title-varbinary.sql' ],
[ 'modifyField', 'page', 'page_title', 'patch-page-page_title-varbinary.sql' ],
];
}

View file

@ -0,0 +1 @@
ALTER TABLE /*_*/page MODIFY page_title VARBINARY(255) NOT NULL;

View file

@ -870,3 +870,32 @@ CREATE INDEX ar_name_title_timestamp ON archive (
CREATE INDEX ar_actor_timestamp ON archive (ar_actor, ar_timestamp);
CREATE UNIQUE INDEX ar_revid_uniq ON archive (ar_rev_id);
CREATE TABLE page (
page_id SERIAL NOT NULL,
page_namespace INT NOT NULL,
page_title TEXT NOT NULL,
page_restrictions TEXT DEFAULT NULL,
page_is_redirect SMALLINT DEFAULT 0 NOT NULL,
page_is_new SMALLINT DEFAULT 0 NOT NULL,
page_random FLOAT NOT NULL,
page_touched TIMESTAMPTZ NOT NULL,
page_links_updated TIMESTAMPTZ DEFAULT NULL,
page_latest INT NOT NULL,
page_len INT NOT NULL,
page_content_model TEXT DEFAULT NULL,
page_lang TEXT DEFAULT NULL,
PRIMARY KEY(page_id)
);
CREATE UNIQUE INDEX name_title ON page (page_namespace, page_title);
CREATE INDEX page_random ON page (page_random);
CREATE INDEX page_len ON page (page_len);
CREATE INDEX page_redirect_namespace_len ON page (
page_is_redirect, page_namespace,
page_len
);

View file

@ -36,35 +36,6 @@ CREATE INDEX user_email ON mwuser (user_email);
INSERT INTO mwuser
VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now());
CREATE SEQUENCE page_page_id_seq;
CREATE TABLE page (
page_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('page_page_id_seq'),
page_namespace SMALLINT NOT NULL,
page_title TEXT NOT NULL,
page_restrictions TEXT,
page_is_redirect SMALLINT NOT NULL DEFAULT 0,
page_is_new SMALLINT NOT NULL DEFAULT 0,
page_random NUMERIC(15,14) NOT NULL DEFAULT RANDOM(),
page_touched TIMESTAMPTZ,
page_links_updated TIMESTAMPTZ NULL,
page_latest INTEGER NOT NULL, -- FK?
page_len INTEGER NOT NULL,
page_content_model TEXT,
page_lang TEXT DEFAULT NULL
);
ALTER SEQUENCE page_page_id_seq OWNED BY page.page_id;
CREATE UNIQUE INDEX name_title ON page (page_namespace, page_title);
CREATE INDEX page_main_title ON page (page_title text_pattern_ops) WHERE page_namespace = 0;
CREATE INDEX page_talk_title ON page (page_title text_pattern_ops) WHERE page_namespace = 1;
CREATE INDEX page_user_title ON page (page_title text_pattern_ops) WHERE page_namespace = 2;
CREATE INDEX page_utalk_title ON page (page_title text_pattern_ops) WHERE page_namespace = 3;
CREATE INDEX page_project_title ON page (page_title text_pattern_ops) WHERE page_namespace = 4;
CREATE INDEX page_mediawiki_title ON page (page_title text_pattern_ops) WHERE page_namespace = 8;
CREATE INDEX page_random ON page (page_random);
CREATE INDEX page_len ON page (page_len);
CREATE INDEX page_redirect_namespace_len ON page (page_is_redirect, page_namespace, page_len);
CREATE FUNCTION page_deleted() RETURNS TRIGGER LANGUAGE plpgsql AS
$mw$
BEGIN

View file

@ -0,0 +1,34 @@
CREATE TABLE /*_*/page_tmp (
page_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
page_namespace INTEGER NOT NULL,
page_title BLOB NOT NULL,
page_restrictions BLOB DEFAULT NULL,
page_is_redirect SMALLINT DEFAULT 0 NOT NULL,
page_is_new SMALLINT DEFAULT 0 NOT NULL,
page_random DOUBLE PRECISION NOT NULL,
page_touched BLOB NOT NULL,
page_links_updated BLOB DEFAULT NULL,
page_latest INTEGER UNSIGNED NOT NULL,
page_len INTEGER UNSIGNED NOT NULL,
page_content_model BLOB DEFAULT NULL,
page_lang BLOB DEFAULT NULL
);
INSERT INTO /*_*/page_tmp
SELECT page_id, page_namespace, page_title, page_restrictions, page_is_redirect, page_is_new,
page_random, page_touched, page_links_updated, page_latest, page_len, page_content_model, page_lang
FROM /*_*/page;
DROP TABLE /*_*/page;
ALTER TABLE /*_*/page_tmp RENAME TO /*_*/page;
CREATE UNIQUE INDEX name_title ON /*_*/page (page_namespace, page_title);
CREATE INDEX page_random ON /*_*/page (page_random);
CREATE INDEX page_len ON /*_*/page (page_len);
CREATE INDEX page_redirect_namespace_len ON /*_*/page (
page_is_redirect, page_namespace,
page_len
);

View file

@ -812,3 +812,29 @@ CREATE INDEX ar_name_title_timestamp ON /*_*/archive (
CREATE INDEX ar_actor_timestamp ON /*_*/archive (ar_actor, ar_timestamp);
CREATE UNIQUE INDEX ar_revid_uniq ON /*_*/archive (ar_rev_id);
CREATE TABLE /*_*/page (
page_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
page_namespace INTEGER NOT NULL, page_title BLOB NOT NULL,
page_restrictions BLOB DEFAULT NULL,
page_is_redirect SMALLINT DEFAULT 0 NOT NULL,
page_is_new SMALLINT DEFAULT 0 NOT NULL,
page_random DOUBLE PRECISION NOT NULL,
page_touched BLOB NOT NULL, page_links_updated BLOB DEFAULT NULL,
page_latest INTEGER UNSIGNED NOT NULL,
page_len INTEGER UNSIGNED NOT NULL,
page_content_model BLOB DEFAULT NULL,
page_lang BLOB DEFAULT NULL
);
CREATE UNIQUE INDEX name_title ON /*_*/page (page_namespace, page_title);
CREATE INDEX page_random ON /*_*/page (page_random);
CREATE INDEX page_len ON /*_*/page (page_len);
CREATE INDEX page_redirect_namespace_len ON /*_*/page (
page_is_redirect, page_namespace,
page_len
);

View file

@ -809,3 +809,28 @@ CREATE TABLE /*_*/archive (
UNIQUE INDEX ar_revid_uniq (ar_rev_id),
PRIMARY KEY(ar_id)
) /*$wgDBTableOptions*/;
CREATE TABLE /*_*/page (
page_id INT UNSIGNED AUTO_INCREMENT NOT NULL,
page_namespace INT NOT NULL,
page_title VARBINARY(255) NOT NULL,
page_restrictions TINYBLOB DEFAULT NULL,
page_is_redirect TINYINT DEFAULT 0 NOT NULL,
page_is_new TINYINT DEFAULT 0 NOT NULL,
page_random DOUBLE PRECISION UNSIGNED NOT NULL,
page_touched BINARY(14) NOT NULL,
page_links_updated VARBINARY(14) DEFAULT NULL,
page_latest INT UNSIGNED NOT NULL,
page_len INT UNSIGNED NOT NULL,
page_content_model VARBINARY(32) DEFAULT NULL,
page_lang VARBINARY(35) DEFAULT NULL,
UNIQUE INDEX name_title (page_namespace, page_title),
INDEX page_random (page_random),
INDEX page_len (page_len),
INDEX page_redirect_namespace_len (
page_is_redirect, page_namespace,
page_len
),
PRIMARY KEY(page_id)
) /*$wgDBTableOptions*/;

View file

@ -3312,5 +3312,128 @@
"pk": [
"ar_id"
]
},
{
"name": "page",
"comment": "Core of the wiki: each page has an entry here which identifies it by title and contains some essential metadata.",
"columns": [
{
"name": "page_id",
"comment": "Unique identifier number. The page_id will be preserved across edits and rename operations, but not deletions and recreations.",
"type": "integer",
"options": { "unsigned": true, "notnull": true, "autoincrement": true }
},
{
"name": "page_namespace",
"comment": "A page name is broken into a namespace and a title. The namespace keys are UI-language-independent constants, defined in includes/Defines.php",
"type": "integer",
"options": { "notnull": true }
},
{
"name": "page_title",
"comment": "The rest of the title, as text. Spaces are transformed into underscores in title storage.",
"type": "binary",
"options": { "notnull": true, "length": 255 }
},
{
"name": " page_restrictions",
"comment": "Comma-separated set of permission keys indicating who can move or edit the page.",
"type": "blob",
"options": { "notnull": false, "length": 255 }
},
{
"name": "page_is_redirect",
"comment": "1 indicates the article is a redirect.",
"type": "mwtinyint",
"options": { "notnull": true, "default": 0 }
},
{
"name": "page_is_new",
"comment": "1 indicates this is a new entry, with only one edit. Not all pages with one edit are new pages.",
"type": "mwtinyint",
"options": { "notnull": true, "default": 0 }
},
{
"name": "page_random",
"comment": "Random value between 0 and 1, used for Special:Randompage",
"type": "float",
"options": {
"notnull": true,
"unsigned": true,
"CustomSchemaOptions": {
"doublePrecision": true
}
}
},
{
"name": "page_touched",
"comment": "This timestamp is updated whenever the page changes in a way requiring it to be re-rendered, invalidating caches. Aside from editing this includes permission changes, creation or deletion of linked pages, and alteration of contained templates.",
"type": "mwtimestamp",
"options": { "notnull": true }
},
{
"name": "page_links_updated",
"comment": "This timestamp is updated whenever a page is re-parsed and it has all the link tracking tables updated for it. This is useful for de-duplicating expensive backlink update jobs.",
"type": "mwtimestamp",
"options": {
"notnull": false,
"default": null,
"CustomSchemaOptions": {
"allowInfinite": true
}
}
},
{
"name": "page_latest",
"comment": "Handy key to revision.rev_id of the current revision. This may be 0 during page creation, but that shouldn't happen outside of a transaction... hopefully.",
"type": "integer",
"options": { "unsigned": true, "notnull": true }
},
{
"name": "page_len",
"comment": "Uncompressed length in bytes of the page's current source text.",
"type": "integer",
"options": { "unsigned": true, "notnull": true }
},
{
"name": "page_content_model",
"comment": "content model, see CONTENT_MODEL_XXX constants",
"type": "binary",
"options": { "length": 32, "notnull": false }
},
{
"name": "page_lang",
"comment": "Page content language",
"type": "binary",
"options": { "length": 35, "notnull": false }
}
],
"indexes": [
{
"name": "name_title",
"columns": [ "page_namespace", "page_title" ],
"comment": "The title index. Care must be taken to always specify a namespace when by title, so that the index is used. Even listing all known namespaces with IN() is better than omitting page_namespace from the WHERE clause.",
"unique": true
},
{
"name": "page_random",
"columns": [ "page_random" ],
"comment": "Index for Special:Random",
"unique": false
},
{
"name": "page_len",
"columns": [ "page_len" ],
"comment": "Questionable utility, used by ProofreadPage, possibly DynamicPageList. ApiQueryAllPages unconditionally filters on namespace and so hopefully does not use it.",
"unique": false
},
{
"name": "page_redirect_namespace_len",
"columns": [ "page_is_redirect", "page_namespace", "page_len" ],
"comment": "The index for Special:Shortpages and Special:Longpages. Also SiteStats::articles() in 'comma' counting mode, MessageCache::loadFromDB().",
"unique": false
}
],
"pk": [ "page_id" ]
}
]

View file

@ -139,82 +139,6 @@ CREATE UNIQUE INDEX /*i*/user_name ON /*_*/user (user_name);
CREATE INDEX /*i*/user_email_token ON /*_*/user (user_email_token);
CREATE INDEX /*i*/user_email ON /*_*/user (user_email(50));
--
-- Core of the wiki: each page has an entry here which identifies
-- it by title and contains some essential metadata.
--
CREATE TABLE /*_*/page (
-- Unique identifier number. The page_id will be preserved across
-- edits and rename operations, but not deletions and recreations.
page_id int unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT,
-- A page name is broken into a namespace and a title.
-- The namespace keys are UI-language-independent constants,
-- defined in includes/Defines.php
page_namespace int NOT NULL,
-- The rest of the title, as text.
-- Spaces are transformed into underscores in title storage.
page_title varchar(255) binary NOT NULL,
-- Comma-separated set of permission keys indicating who
-- can move or edit the page.
page_restrictions tinyblob NULL,
-- 1 indicates the article is a redirect.
page_is_redirect tinyint unsigned NOT NULL default 0,
-- 1 indicates this is a new entry, with only one edit.
-- Not all pages with one edit are new pages.
page_is_new tinyint unsigned NOT NULL default 0,
-- Random value between 0 and 1, used for Special:Randompage
page_random real unsigned NOT NULL,
-- This timestamp is updated whenever the page changes in
-- a way requiring it to be re-rendered, invalidating caches.
-- Aside from editing this includes permission changes,
-- creation or deletion of linked pages, and alteration
-- of contained templates.
page_touched binary(14) NOT NULL default '',
-- This timestamp is updated whenever a page is re-parsed and
-- it has all the link tracking tables updated for it. This is
-- useful for de-duplicating expensive backlink update jobs.
page_links_updated varbinary(14) NULL default NULL,
-- Handy key to revision.rev_id of the current revision.
-- This may be 0 during page creation, but that shouldn't
-- happen outside of a transaction... hopefully.
page_latest int unsigned NOT NULL,
-- Uncompressed length in bytes of the page's current source text.
page_len int unsigned NOT NULL,
-- content model, see CONTENT_MODEL_XXX constants
page_content_model varbinary(32) DEFAULT NULL,
-- Page content language
page_lang varbinary(35) DEFAULT NULL
) /*$wgDBTableOptions*/;
-- The title index. Care must be taken to always specify a namespace when
-- by title, so that the index is used. Even listing all known namespaces
-- with IN() is better than omitting page_namespace from the WHERE clause.
CREATE UNIQUE INDEX /*i*/name_title ON /*_*/page (page_namespace,page_title);
-- The index for Special:Random
CREATE INDEX /*i*/page_random ON /*_*/page (page_random);
-- Questionable utility, used by ProofreadPage, possibly DynamicPageList.
-- ApiQueryAllPages unconditionally filters on namespace and so hopefully does
-- not use it.
CREATE INDEX /*i*/page_len ON /*_*/page (page_len);
-- The index for Special:Shortpages and Special:Longpages. Also SiteStats::articles()
-- in 'comma' counting mode, MessageCache::loadFromDB().
CREATE INDEX /*i*/page_redirect_namespace_len ON /*_*/page (page_is_redirect, page_namespace, page_len);
--
-- Every edit of a page creates also a revision row.
-- This stores metadata about the revision, and a reference

View file

@ -33,6 +33,7 @@ class DatabaseIntegrationTest extends MediaWikiIntegrationTestCase {
'revision_actor_temp',
'change_tag',
'objectcache',
'page'
];
$prefixes = [];