Migrate job to abstract schema

Postgres:
 - Drop deafault from job_id
 - Add default to job_cmd
 - Change job_namespace datatype to INTEGER
 - Drop  job_cmd_namespace_title index
 - Add job_cmd index on the same fields as MySQL
 - Rename index job_timestamp_idx

MySQL:
 - Change job_title datatype to VARBINARY
 - Change job_timestamp datatype to BINARY (mwtimestamp)
 - Change job_token_timestamp datatype to BINARY (mwtimestamp)

Bug: T230428
Bug: T164898
Bug: T42626
Change-Id: I207aefc48c7bcbb5b5362af4b63d9a1383019a6d
This commit is contained in:
Ammar Abdulhamid 2020-11-18 06:50:39 +01:00 committed by Amir Sarabadani
parent 69d7bf0cfe
commit 40ebc05ce4
14 changed files with 192 additions and 66 deletions

View file

@ -466,6 +466,9 @@ class MysqlUpdater extends DatabaseUpdater {
[ 'modifyField', 'ipblocks_restrictions', 'ir_type', 'patch-ipblocks_restrictions-ir_type.sql' ],
[ 'renameIndex', 'watchlist', 'namespace_title', 'wl_namespace_title', false,
'patch-watchlist-namespace_title-rename-index.sql' ],
[ 'modifyField', 'job', 'job_title', 'patch-job-job_title-varbinary.sql' ],
[ 'modifyField', 'job', 'job_timestamp', 'patch-job_job_timestamp.sql' ],
[ 'modifyField', 'job', 'job_token_timestamp', 'patch-job_job_token_timestamp.sql' ],
];
}

View file

@ -819,6 +819,11 @@ class PostgresUpdater extends DatabaseUpdater {
[ 'dropFkey', 'page_props', 'pp_page' ],
// Moved from the Schema SQL file to here in 1.36
[ 'changePrimaryKey', 'page_props', [ 'pp_page', 'pp_propname' ], 'page_props_pk' ],
[ 'setDefault','job', 'job_cmd', '' ],
[ 'changeField', 'job', 'job_namespace', 'INTEGER', '' ],
[ 'dropPgIndex', 'job', 'job_cmd_namespace_title' ],
[ 'addPgIndex', 'job', 'job_cmd', '(job_cmd, job_namespace, job_title, job_params)' ],
[ 'renameIndex', 'job', 'job_timestamp_idx', 'job_timestamp' ],
];
}

View file

@ -315,6 +315,7 @@ class SqliteUpdater extends DatabaseUpdater {
[ 'modifyField', 'revision_actor_temp', 'revactor_timestamp', 'patch-revactor_timestamp-drop-default.sql' ],
[ 'renameIndex', 'watchlist', 'namespace_title', 'wl_namespace_title', false,
'patch-watchlist-namespace_title-rename-index.sql' ],
[ 'modifyField', 'job', 'job_title', 'patch-job-job_title-varbinary.sql' ],
];
}

View file

@ -0,0 +1 @@
ALTER TABLE /*_*/job MODIFY job_title VARBINARY(255) NOT NULL;

View file

@ -0,0 +1,2 @@
ALTER TABLE /*_*/job
MODIFY job_timestamp BINARY(14);

View file

@ -0,0 +1,2 @@
ALTER TABLE /*_*/job
MODIFY job_token_timestamp BINARY(14);

View file

@ -86,6 +86,7 @@ class GenerateSchemaSql extends Maintenance {
if ( $platform === 'postgres' ) {
// Remove table prefixes from Postgres schema, people should not set it
// but better safe than sorry.
$sql = str_replace( "\n /*_*/\n ", ' ', $sql );
$sql = str_replace( "\n/*_*/\n", ' ', $sql );
// MySQL goes with varbinary for collation reasons, but postgres can't

View file

@ -509,3 +509,32 @@ CREATE UNIQUE INDEX pp_propname_page ON page_props (pp_propname, pp_page);
CREATE UNIQUE INDEX pp_propname_sortkey_page ON page_props (pp_propname, pp_sortkey, pp_page)
WHERE
(pp_sortkey IS NOT NULL);
CREATE TABLE job (
job_id SERIAL NOT NULL,
job_cmd TEXT DEFAULT '' NOT NULL,
job_namespace INT NOT NULL,
job_title TEXT NOT NULL,
job_timestamp TIMESTAMPTZ DEFAULT NULL,
job_params TEXT NOT NULL,
job_random INT DEFAULT 0 NOT NULL,
job_attempts INT DEFAULT 0 NOT NULL,
job_token TEXT DEFAULT '' NOT NULL,
job_token_timestamp TIMESTAMPTZ DEFAULT NULL,
job_sha1 TEXT DEFAULT '' NOT NULL,
PRIMARY KEY(job_id)
);
CREATE INDEX job_sha1 ON job (job_sha1);
CREATE INDEX job_cmd_token ON job (job_cmd, job_token, job_random);
CREATE INDEX job_cmd_token_id ON job (job_cmd, job_token, job_id);
CREATE INDEX job_cmd ON job (
job_cmd, job_namespace, job_title,
job_params
);
CREATE INDEX job_timestamp ON job (job_timestamp);

View file

@ -352,28 +352,6 @@ CREATE INDEX logging_page_id_time ON logging (log_page, log_timestamp);
CREATE INDEX logging_actor_time ON logging (log_actor, log_timestamp);
CREATE INDEX logging_type_action ON logging (log_type, log_action, log_timestamp);
CREATE SEQUENCE job_job_id_seq;
CREATE TABLE job (
job_id INTEGER NOT NULL PRIMARY KEY DEFAULT nextval('job_job_id_seq'),
job_cmd TEXT NOT NULL,
job_namespace SMALLINT NOT NULL,
job_title TEXT NOT NULL,
job_timestamp TIMESTAMPTZ,
job_params TEXT NOT NULL,
job_random INTEGER NOT NULL DEFAULT 0,
job_attempts INTEGER NOT NULL DEFAULT 0,
job_token TEXT NOT NULL DEFAULT '',
job_token_timestamp TIMESTAMPTZ,
job_sha1 TEXT NOT NULL DEFAULT ''
);
ALTER SEQUENCE job_job_id_seq OWNED BY job.job_id;
CREATE INDEX job_sha1 ON job (job_sha1);
CREATE INDEX job_cmd_token ON job (job_cmd, job_token, job_random);
CREATE INDEX job_cmd_token_id ON job (job_cmd, job_token, job_id);
CREATE INDEX job_cmd_namespace_title ON job (job_cmd, job_namespace, job_title);
CREATE INDEX job_timestamp_idx ON job (job_timestamp);
-- Tsearch2 2 stuff. Will fail if we don't have proper access to the tsearch2 tables
-- Make sure you also change patch-tsearch2funcs.sql if the funcs below change.

View file

@ -0,0 +1,15 @@
CREATE TABLE /*_*/redirect_tmp (
rd_from INTEGER UNSIGNED DEFAULT 0 NOT NULL,
rd_namespace INTEGER DEFAULT 0 NOT NULL,
rd_title BLOB DEFAULT '' NOT NULL,
rd_interwiki VARCHAR(32) DEFAULT NULL,
rd_fragment BLOB DEFAULT NULL,
PRIMARY KEY(rd_from)
);
INSERT INTO /*_*/redirect_tmp
SELECT rd_from, rd_namespace, rd_title, rd_interwiki, rd_fragment
FROM /*_*/redirect;
DROP TABLE /*_*/redirect;
ALTER TABLE /*_*/redirect_tmp RENAME TO /*_*/redirect;
CREATE INDEX rd_ns_title ON /*_*/redirect (rd_namespace, rd_title, rd_from);

View file

@ -488,3 +488,28 @@ CREATE TABLE /*_*/page_props (
CREATE UNIQUE INDEX pp_propname_page ON /*_*/page_props (pp_propname, pp_page);
CREATE UNIQUE INDEX pp_propname_sortkey_page ON /*_*/page_props (pp_propname, pp_sortkey, pp_page);
CREATE TABLE /*_*/job (
job_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
job_cmd BLOB DEFAULT '' NOT NULL, job_namespace INTEGER NOT NULL,
job_title BLOB NOT NULL, job_timestamp BLOB DEFAULT NULL,
job_params BLOB NOT NULL, job_random INTEGER UNSIGNED DEFAULT 0 NOT NULL,
job_attempts INTEGER UNSIGNED DEFAULT 0 NOT NULL,
job_token BLOB DEFAULT '' NOT NULL,
job_token_timestamp BLOB DEFAULT NULL,
job_sha1 BLOB DEFAULT '' NOT NULL
);
CREATE INDEX job_sha1 ON /*_*/job (job_sha1);
CREATE INDEX job_cmd_token ON /*_*/job (job_cmd, job_token, job_random);
CREATE INDEX job_cmd_token_id ON /*_*/job (job_cmd, job_token, job_id);
CREATE INDEX job_cmd ON /*_*/job (
job_cmd, job_namespace, job_title,
job_params
);
CREATE INDEX job_timestamp ON /*_*/job (job_timestamp);

View file

@ -452,3 +452,29 @@ CREATE TABLE /*_*/page_props (
UNIQUE INDEX pp_propname_sortkey_page (pp_propname, pp_sortkey, pp_page),
PRIMARY KEY(pp_page, pp_propname)
) /*$wgDBTableOptions*/;
CREATE TABLE /*_*/job (
job_id INT UNSIGNED AUTO_INCREMENT NOT NULL,
job_cmd VARBINARY(60) DEFAULT '' NOT NULL,
job_namespace INT NOT NULL,
job_title VARBINARY(255) NOT NULL,
job_timestamp BINARY(14) DEFAULT NULL,
job_params MEDIUMBLOB NOT NULL,
job_random INT UNSIGNED DEFAULT 0 NOT NULL,
job_attempts INT UNSIGNED DEFAULT 0 NOT NULL,
job_token VARBINARY(32) DEFAULT '' NOT NULL,
job_token_timestamp BINARY(14) DEFAULT NULL,
job_sha1 VARBINARY(32) DEFAULT '' NOT NULL,
INDEX job_sha1 (job_sha1),
INDEX job_cmd_token (job_cmd, job_token, job_random),
INDEX job_cmd_token_id (job_cmd, job_token, job_id),
INDEX job_cmd (
job_cmd,
job_namespace,
job_title,
job_params(128)
),
INDEX job_timestamp (job_timestamp),
PRIMARY KEY(job_id)
) /*$wgDBTableOptions*/;

View file

@ -1439,6 +1439,88 @@
{ "name": "pp_propname_sortkey_page", "columns": [ "pp_propname", "pp_sortkey", "pp_page" ], "unique": true, "options": { "where": "(pp_sortkey IS NOT NULL)" } }
],
"pk": [ "pp_page", "pp_propname" ]
},
{
"name": "job",
"comment": "Jobs performed by parallel apache threads or a command-line daemon",
"columns": [
{
"name": "job_id",
"type": "integer",
"options": { "unsigned": true, "notnull": true, "autoincrement": true }
},
{
"name": "job_cmd",
"comment": "Command name. Limited to 60 to prevent key length overflow",
"type": "binary",
"options": { "notnull": true, "default": "", "length": 60 }
},
{
"name": "job_namespace",
"comment": "Namespace to act on. Should be 0 if the command does not operate on a title",
"type": "integer",
"options": { "notnull": true }
},
{
"name": "job_title",
"comment": "Title to act on. Should be '' if the command does not operate on a title",
"type": "binary",
"options": { "notnull": true, "length": 255 }
},
{
"name": "job_timestamp",
"comment": "Timestamp of when the job was inserted. NULL for jobs added before addition of the timestamp",
"type": "mwtimestamp",
"options": { "notnull": false }
},
{
"name": "job_params",
"comment": "Any other parameters to the command. Stored as a PHP serialized array, or an empty string if there are no parameters",
"type": "blob",
"options": { "notnull": true, "length": 16777215 }
},
{
"name": "job_random",
"comment": "Random, non-unique, number used for job acquisition (for lock concurrency)",
"type": "integer",
"options": { "unsigned": true, "notnull": true, "default": 0 }
},
{
"name": "job_attempts",
"comment": "The number of times this job has been locked",
"type": "integer",
"options": { "unsigned": true, "notnull": true, "default": 0 }
},
{
"name": "job_token",
"comment": "Field that conveys process locks on rows via process UUIDs",
"type": "binary",
"options": { "notnull": true, "default": "", "length": 32 }
},
{
"name": "job_token_timestamp",
"comment": "Timestamp when the job was locked",
"type": "mwtimestamp",
"options": { "notnull": false }
},
{
"name": "job_sha1",
"comment": "Base 36 SHA1 of the job parameters relevant to detecting duplicates",
"type": "binary",
"options": { "notnull": true, "length": 32, "default": "" }
}
],
"indexes": [
{ "name": "job_sha1", "columns": [ "job_sha1" ], "unique": false },
{ "name": "job_cmd_token", "columns": [ "job_cmd", "job_token", "job_random" ], "unique": false },
{ "name": "job_cmd_token_id", "columns": [ "job_cmd", "job_token", "job_id" ], "unique": false },
{ "name": "job_cmd", "columns": [ "job_cmd", "job_namespace", "job_title", "job_params" ],
"unique": false,
"options": { "lengths": [ null, null, null, 128 ] }
},
{ "name": "job_timestamp", "columns": [ "job_timestamp" ], "unique": false }
],
"pk": [ "job_id" ]
}
]

View file

@ -958,48 +958,4 @@ CREATE INDEX /*i*/log_page_id_time ON /*_*/logging (log_page,log_timestamp);
-- Special:Log action filter
CREATE INDEX /*i*/log_type_action ON /*_*/logging (log_type, log_action, log_timestamp);
-- Jobs performed by parallel apache threads or a command-line daemon
CREATE TABLE /*_*/job (
job_id int unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT,
-- Command name
-- Limited to 60 to prevent key length overflow
job_cmd varbinary(60) NOT NULL default '',
-- Namespace and title to act on
-- Should be 0 and '' if the command does not operate on a title
job_namespace int NOT NULL,
job_title varchar(255) binary NOT NULL,
-- Timestamp of when the job was inserted
-- NULL for jobs added before addition of the timestamp
job_timestamp varbinary(14) NULL default NULL,
-- Any other parameters to the command
-- Stored as a PHP serialized array, or an empty string if there are no parameters
job_params mediumblob NOT NULL,
-- Random, non-unique, number used for job acquisition (for lock concurrency)
job_random integer unsigned NOT NULL default 0,
-- The number of times this job has been locked
job_attempts integer unsigned NOT NULL default 0,
-- Field that conveys process locks on rows via process UUIDs
job_token varbinary(32) NOT NULL default '',
-- Timestamp when the job was locked
job_token_timestamp varbinary(14) NULL default NULL,
-- Base 36 SHA1 of the job parameters relevant to detecting duplicates
job_sha1 varbinary(32) NOT NULL default ''
) /*$wgDBTableOptions*/;
CREATE INDEX /*i*/job_sha1 ON /*_*/job (job_sha1);
CREATE INDEX /*i*/job_cmd_token ON /*_*/job (job_cmd,job_token,job_random);
CREATE INDEX /*i*/job_cmd_token_id ON /*_*/job (job_cmd,job_token,job_id);
CREATE INDEX /*i*/job_cmd ON /*_*/job (job_cmd, job_namespace, job_title, job_params(128));
CREATE INDEX /*i*/job_timestamp ON /*_*/job (job_timestamp);
-- vim: sw=2 sts=2 et