Drop two unused Perl scripts
Probably nobody is using these. Also hopefully after fully migrating to abstract schema, there'd not be any more unnecessary drifts between MySQL and Postgres tables, obsoleting the need for compare_schema.pl For mysql2postgres.pl, dumpBackup.php/importDump.php can be used. Bug: T258876 Change-Id: Ief36bca6f3a387f811408f2a5e4840656dfffff2
This commit is contained in:
parent
9e644bbac4
commit
0e7bf93d48
3 changed files with 0 additions and 1011 deletions
|
|
@ -74,16 +74,6 @@ referenced directly by the code (unlike sequence names). Most of
|
|||
the indexes in the file as of this writing are there due to production
|
||||
testing of expensive queries on a busy wiki.
|
||||
|
||||
== Keeping in sync with tables.sql ==
|
||||
|
||||
The script maintenance/postgres/compare_schemas.pl should be
|
||||
periodically run. It will parse both "tables.sql" files and
|
||||
produce any differences found. Such differences should be fixed
|
||||
or exceptions specifically carved out by editing the script
|
||||
itself. This script has also been very useful in finding problems
|
||||
in maintenance/tables.sql itself, as it is very strict in the
|
||||
format it expects things to be in. :)
|
||||
|
||||
== MySQL differences ==
|
||||
|
||||
The major differences between MySQL and Postgres are represented as
|
||||
|
|
|
|||
|
|
@ -1,565 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
## Rough check that the base and postgres "tables.sql" are in sync
|
||||
## Should be run from maintenance/postgres
|
||||
## Checks a few other things as well...
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use Data::Dumper;
|
||||
use Cwd;
|
||||
|
||||
#check_valid_sql();
|
||||
|
||||
my @old = ('../tables.sql');
|
||||
my $new = 'tables.sql';
|
||||
my @xfile;
|
||||
|
||||
## Read in exceptions and other metadata
|
||||
my %ok;
|
||||
while (<DATA>) {
|
||||
next unless /^(\w+)\s*:\s*([^#]+)/;
|
||||
my ($name,$val) = ($1,$2);
|
||||
chomp $val;
|
||||
if ($name eq 'RENAME') {
|
||||
die "Invalid rename\n" unless $val =~ /(\w+)\s+(\w+)/;
|
||||
$ok{OLD}{$1} = $2;
|
||||
$ok{NEW}{$2} = $1;
|
||||
next;
|
||||
}
|
||||
if ($name eq 'XFILE') {
|
||||
push @xfile, $val;
|
||||
next;
|
||||
}
|
||||
for (split /\s+/ => $val) {
|
||||
$ok{$name}{$_} = 0;
|
||||
}
|
||||
}
|
||||
|
||||
my $datatype = join '|' => qw(
|
||||
bool
|
||||
tinyint smallint int bigint real float
|
||||
tinytext mediumtext text char varchar varbinary binary
|
||||
timestamp datetime
|
||||
tinyblob mediumblob blob
|
||||
);
|
||||
$datatype .= q{|ENUM\([\"\w\', ]+\)};
|
||||
$datatype = qr{($datatype)};
|
||||
|
||||
my $typeval = qr{(\(\d+\))?};
|
||||
|
||||
my $typeval2 = qr{ signed| unsigned| binary| NOT NULL| NULL| PRIMARY KEY| AUTO_INCREMENT| default ['\-\d\w"]+| REFERENCES .+CASCADE};
|
||||
|
||||
my $indextype = join '|' => qw(INDEX KEY FULLTEXT), 'PRIMARY KEY', 'UNIQUE INDEX', 'UNIQUE KEY';
|
||||
$indextype = qr{$indextype};
|
||||
|
||||
my $engine = qr{TYPE|ENGINE};
|
||||
|
||||
my $tabletype = qr{InnoDB|MyISAM|HEAP|HEAP MAX_ROWS=\d+|InnoDB MAX_ROWS=\d+ AVG_ROW_LENGTH=\d+};
|
||||
|
||||
my $charset = qr{utf8|binary};
|
||||
|
||||
open my $newfh, '<', $new or die qq{Could not open $new: $!\n};
|
||||
|
||||
|
||||
my ($table,%old);
|
||||
|
||||
## Read in the xfiles
|
||||
my %xinfo;
|
||||
for my $xfile (@xfile) {
|
||||
print "Loading $xfile\n";
|
||||
my $info = parse_sql($xfile);
|
||||
for (keys %$info) {
|
||||
$xinfo{$_} = $info->{$_};
|
||||
}
|
||||
}
|
||||
|
||||
for my $oldfile (@old) {
|
||||
print "Loading $oldfile\n";
|
||||
my $info = parse_sql($oldfile);
|
||||
for (keys %xinfo) {
|
||||
$info->{$_} = $xinfo{$_};
|
||||
}
|
||||
$old{$oldfile} = $info;
|
||||
}
|
||||
|
||||
sub parse_sql {
|
||||
|
||||
my $oldfile = shift;
|
||||
|
||||
open my $oldfh, '<', $oldfile or die qq{Could not open $oldfile: $!\n};
|
||||
|
||||
my %info;
|
||||
while (<$oldfh>) {
|
||||
next if /^\s*\-\-/ or /^\s+$/;
|
||||
s/\s*\-\- [\w ]+$//;
|
||||
chomp;
|
||||
|
||||
if (/CREATE\s*TABLE/i) {
|
||||
if (m{^CREATE TABLE /\*_\*/(\w+) \($}) {
|
||||
$table = $1;
|
||||
}
|
||||
elsif (m{^CREATE TABLE /\*\$wgDBprefix\*/(\w+) \($}) {
|
||||
$table = $1;
|
||||
}
|
||||
else {
|
||||
die qq{Invalid CREATE TABLE at line $. of $oldfile\n};
|
||||
}
|
||||
$info{$table}{name}=$table;
|
||||
}
|
||||
elsif (m{^\) /\*\$wgDBTableOptions\*/}) {
|
||||
$info{$table}{engine} = 'ENGINE';
|
||||
$info{$table}{type} = 'variable';
|
||||
}
|
||||
elsif (/^\) ($engine)=($tabletype);$/) {
|
||||
$info{$table}{engine}=$1;
|
||||
$info{$table}{type}=$2;
|
||||
}
|
||||
elsif (/^\) ($engine)=($tabletype), DEFAULT CHARSET=($charset);$/) {
|
||||
$info{$table}{engine}=$1;
|
||||
$info{$table}{type}=$2;
|
||||
$info{$table}{charset}=$3;
|
||||
}
|
||||
elsif (/^ (\w+) $datatype$typeval$typeval2{0,4},?$/) {
|
||||
$info{$table}{column}{$1} = $2;
|
||||
my $extra = $3 || '';
|
||||
$info{$table}{columnfull}{$1} = "$2$extra";
|
||||
}
|
||||
elsif (m{^ UNIQUE KEY (\w+) \((.+?)\)}) {
|
||||
}
|
||||
elsif (m{^CREATE (?:UNIQUE )?(?:FULLTEXT )?INDEX /\*i\*/(\w+) ON /\*_\*/(\w+) \((.+?)\);}) {
|
||||
}
|
||||
elsif (m{^\s*PRIMARY KEY \([\w,]+\)}) {
|
||||
}
|
||||
else {
|
||||
die "Cannot parse line $. of $oldfile:\n$_\n";
|
||||
}
|
||||
|
||||
}
|
||||
close $oldfh or die qq{Could not close "$oldfile": $!\n};
|
||||
|
||||
return \%info;
|
||||
|
||||
} ## end of parse_sql
|
||||
|
||||
for my $oldfile (@old) {
|
||||
|
||||
## Begin non-standard indent
|
||||
|
||||
## MySQL sanity checks
|
||||
for my $table (sort keys %{$old{$oldfile}}) {
|
||||
my $t = $old{$oldfile}{$table};
|
||||
if ($t->{engine} eq 'TYPE') {
|
||||
die "Invalid engine for $oldfile: $t->{engine}\n" unless $t->{name} eq 'profiling';
|
||||
}
|
||||
my $charset = $t->{charset} || '';
|
||||
if ($oldfile !~ /binary/ and $charset eq 'binary') {
|
||||
die "Invalid charset for $oldfile: $charset\n";
|
||||
}
|
||||
}
|
||||
|
||||
my $dtypelist = join '|' => qw(
|
||||
SMALLINT INTEGER BIGINT NUMERIC SERIAL
|
||||
TEXT CHAR VARCHAR
|
||||
BYTEA
|
||||
TIMESTAMPTZ
|
||||
CIDR
|
||||
);
|
||||
my $dtype = qr{($dtypelist)};
|
||||
my %new;
|
||||
my ($infunction,$inview,$inrule,$lastcomma) = (0,0,0,0);
|
||||
my %custom_type;
|
||||
seek $newfh, 0, 0;
|
||||
while (<$newfh>) {
|
||||
next if /^\s*\-\-/ or /^\s*$/;
|
||||
s/\s*\-\- [\w ']+$//;
|
||||
next if /^BEGIN;/ or /^SET / or /^COMMIT;/;
|
||||
next if /^CREATE SEQUENCE/;
|
||||
next if /^CREATE(?: UNIQUE)? INDEX/;
|
||||
next if /^CREATE FUNCTION/;
|
||||
next if /^CREATE TRIGGER/ or /^ FOR EACH ROW/;
|
||||
next if /^INSERT INTO/ or /^ VALUES \(/;
|
||||
next if /^ALTER TABLE/;
|
||||
next if /^DROP SEQUENCE/;
|
||||
next if /^DROP FUNCTION/;
|
||||
|
||||
if (/^CREATE TYPE (\w+)/) {
|
||||
die "Type $1 declared more than once!\n" if $custom_type{$1}++;
|
||||
$dtype = qr{($dtypelist|$1)};
|
||||
next;
|
||||
}
|
||||
|
||||
chomp;
|
||||
|
||||
if (/^\$mw\$;?$/) {
|
||||
$infunction = $infunction ? 0 : 1;
|
||||
next;
|
||||
}
|
||||
next if $infunction;
|
||||
|
||||
next if /^CREATE VIEW/ and $inview = 1;
|
||||
if ($inview) {
|
||||
/;$/ and $inview = 0;
|
||||
next;
|
||||
}
|
||||
|
||||
next if /^CREATE RULE/ and $inrule = 1;
|
||||
if ($inrule) {
|
||||
/;$/ and $inrule = 0;
|
||||
next;
|
||||
}
|
||||
|
||||
if (/^CREATE TABLE "?(\w+)"? \($/) {
|
||||
$table = $1;
|
||||
$new{$table}{name}=$table;
|
||||
$lastcomma = 1;
|
||||
}
|
||||
elsif (/^\);$/) {
|
||||
if ($lastcomma) {
|
||||
warn "Stray comma before line $.\n";
|
||||
}
|
||||
}
|
||||
elsif (/^ (\w+) +$dtype.*?(,?)(?: --.*)?$/) {
|
||||
$new{$table}{column}{$1} = $2;
|
||||
if (!$lastcomma) {
|
||||
print "Missing comma before line $. of $new\n";
|
||||
}
|
||||
$lastcomma = $3 ? 1 : 0;
|
||||
}
|
||||
elsif (m{^\s*PRIMARY KEY \([\w,]+\)}) {
|
||||
$lastcomma = 0;
|
||||
}
|
||||
else {
|
||||
die "Cannot parse line $. of $new:\n$_\n";
|
||||
}
|
||||
}
|
||||
|
||||
## Which column types are okay to map from mysql to postgres?
|
||||
my $COLMAP = q{
|
||||
## INTS:
|
||||
tinyint SMALLINT
|
||||
int INTEGER SERIAL
|
||||
smallint SMALLINT
|
||||
bigint BIGINT
|
||||
real NUMERIC
|
||||
float NUMERIC
|
||||
|
||||
## TEXT:
|
||||
varchar(15) TEXT
|
||||
varchar(32) TEXT
|
||||
varchar(70) TEXT
|
||||
varchar(255) TEXT
|
||||
varchar TEXT
|
||||
text TEXT
|
||||
tinytext TEXT
|
||||
ENUM TEXT
|
||||
|
||||
## TIMESTAMPS:
|
||||
varbinary(14) TIMESTAMPTZ
|
||||
binary(14) TIMESTAMPTZ
|
||||
datetime TIMESTAMPTZ
|
||||
timestamp TIMESTAMPTZ
|
||||
|
||||
## BYTEA:
|
||||
mediumblob BYTEA
|
||||
|
||||
## OTHER:
|
||||
bool SMALLINT # Sigh
|
||||
|
||||
};
|
||||
## Allow specific exceptions to the above
|
||||
my $COLMAPOK = q{
|
||||
## User inputted text strings:
|
||||
ar_comment tinyblob TEXT
|
||||
fa_description tinyblob TEXT
|
||||
img_description tinyblob TEXT
|
||||
ipb_reason tinyblob TEXT
|
||||
log_action varbinary(32) TEXT
|
||||
log_type varbinary(32) TEXT
|
||||
oi_description tinyblob TEXT
|
||||
rev_comment tinyblob TEXT
|
||||
rc_log_action varbinary(255) TEXT
|
||||
rc_log_type varbinary(255) TEXT
|
||||
|
||||
## Simple text-only strings:
|
||||
ar_flags tinyblob TEXT
|
||||
cf_name varbinary(255) TEXT
|
||||
cf_value blob TEXT
|
||||
ar_sha1 varbinary(32) TEXT
|
||||
cl_collation varbinary(32) TEXT
|
||||
cl_sortkey varbinary(230) TEXT
|
||||
ct_params blob TEXT
|
||||
fa_minor_mime varbinary(100) TEXT
|
||||
fa_storage_group varbinary(16) TEXT # Just 'deleted' for now, should stay plain text
|
||||
fa_storage_key varbinary(64) TEXT # sha1 plus text extension
|
||||
ipb_address tinyblob TEXT # IP address or username
|
||||
ipb_range_end tinyblob TEXT # hexadecimal
|
||||
ipb_range_start tinyblob TEXT # hexadecimal
|
||||
img_minor_mime varbinary(100) TEXT
|
||||
lc_lang varbinary(32) TEXT
|
||||
lc_value varbinary(32) TEXT
|
||||
img_sha1 varbinary(32) TEXT
|
||||
iw_wikiid varchar(64) TEXT
|
||||
job_cmd varbinary(60) TEXT # Should we limit to 60 as well?
|
||||
keyname varbinary(255) TEXT # No tablename prefix (objectcache)
|
||||
ll_lang varbinary(20) TEXT # Language code
|
||||
lc_value mediumblob TEXT
|
||||
log_params blob TEXT # LF separated list of args
|
||||
log_type varbinary(10) TEXT
|
||||
ls_field varbinary(32) TEXT
|
||||
md_deps mediumblob TEXT # JSON
|
||||
md_module varbinary(255) TEXT
|
||||
md_skin varbinary(32) TEXT
|
||||
mr_blob mediumblob TEXT # JSON
|
||||
mr_lang varbinary(32) TEXT
|
||||
mr_resource varbinary(255) TEXT
|
||||
mrl_message varbinary(255) TEXT
|
||||
mrl_resource varbinary(255) TEXT
|
||||
oi_minor_mime varbinary(100) TEXT
|
||||
oi_sha1 varbinary(32) TEXT
|
||||
old_flags tinyblob TEXT
|
||||
old_text mediumblob TEXT
|
||||
pp_propname varbinary(60) TEXT
|
||||
pp_value blob TEXT
|
||||
page_restrictions tinyblob TEXT # CSV string
|
||||
pf_server varchar(30) TEXT
|
||||
pr_level varbinary(60) TEXT
|
||||
pr_type varbinary(60) TEXT
|
||||
pt_create_perm varbinary(60) TEXT
|
||||
pt_reason tinyblob TEXT
|
||||
qc_type varbinary(32) TEXT
|
||||
qcc_type varbinary(32) TEXT
|
||||
qci_type varbinary(32) TEXT
|
||||
rc_params blob TEXT
|
||||
rev_sha1 varbinary(32) TEXT
|
||||
rlc_to_blob blob TEXT
|
||||
ts_tags blob TEXT
|
||||
ufg_group varbinary(32) TEXT
|
||||
ug_group varbinary(32) TEXT
|
||||
ul_value blob TEXT
|
||||
up_property varbinary(255) TEXT
|
||||
up_value blob TEXT
|
||||
us_sha1 varchar(31) TEXT
|
||||
us_source_type varchar(50) TEXT
|
||||
us_status varchar(50) TEXT
|
||||
user_email_token binary(32) TEXT
|
||||
user_ip varbinary(40) TEXT
|
||||
user_newpassword tinyblob TEXT
|
||||
user_options blob TEXT
|
||||
user_password tinyblob TEXT
|
||||
user_token binary(32) TEXT
|
||||
iwl_prefix varbinary(20) TEXT
|
||||
|
||||
## Text URLs:
|
||||
el_index blob TEXT
|
||||
el_to blob TEXT
|
||||
iw_api blob TEXT
|
||||
iw_url blob TEXT
|
||||
tb_url blob TEXT
|
||||
tc_url varbinary(255) TEXT
|
||||
|
||||
## Deprecated or not yet used:
|
||||
ar_text mediumblob TEXT
|
||||
job_params blob TEXT
|
||||
log_deleted tinyint INTEGER # Not used yet, but keep it INTEGER for safety
|
||||
rc_type tinyint CHAR
|
||||
|
||||
## Number tweaking:
|
||||
fa_bits int SMALLINT # bits per pixel
|
||||
fa_height int SMALLINT
|
||||
fa_width int SMALLINT # Hope we don't see an image this wide...
|
||||
hc_id int BIGINT # Odd that site_stats is all bigint...
|
||||
img_bits int SMALLINT # bits per image should stay sane
|
||||
oi_bits int SMALLINT
|
||||
|
||||
## True binary fields, usually due to gzdeflate and/or serialize:
|
||||
math_inputhash varbinary(16) BYTEA
|
||||
math_outputhash varbinary(16) BYTEA
|
||||
|
||||
## Namespaces: not need for such a high range
|
||||
ar_namespace int SMALLINT
|
||||
job_namespace int SMALLINT
|
||||
log_namespace int SMALLINT
|
||||
page_namespace int SMALLINT
|
||||
pl_namespace int SMALLINT
|
||||
pt_namespace int SMALLINT
|
||||
qc_namespace int SMALLINT
|
||||
rc_namespace int SMALLINT
|
||||
rd_namespace int SMALLINT
|
||||
rlc_to_namespace int SMALLINT
|
||||
tl_namespace int SMALLINT
|
||||
wl_namespace int SMALLINT
|
||||
|
||||
## Easy enough to change if a wiki ever does grow this big:
|
||||
ss_active_users bigint INTEGER
|
||||
ss_good_articles bigint INTEGER
|
||||
ss_total_edits bigint INTEGER
|
||||
ss_total_pages bigint INTEGER
|
||||
ss_users bigint INTEGER
|
||||
|
||||
## True IP - keep an eye on these, coders tend to make textual assumptions
|
||||
rc_ip varbinary(40) CIDR # Want to keep an eye on this
|
||||
|
||||
## Others:
|
||||
tc_time int TIMESTAMPTZ
|
||||
|
||||
|
||||
};
|
||||
|
||||
my %colmap;
|
||||
for (split /\n/ => $COLMAP) {
|
||||
next unless /^\w/;
|
||||
s/(.*?)#.*/$1/;
|
||||
my ($col,@maps) = split / +/, $_;
|
||||
for (@maps) {
|
||||
$colmap{$col}{$_} = 1;
|
||||
}
|
||||
}
|
||||
|
||||
my %colmapok;
|
||||
for (split /\n/ => $COLMAPOK) {
|
||||
next unless /^\w/;
|
||||
my ($col,$old,$new) = split / +/, $_;
|
||||
$colmapok{$col}{$old}{$new} = 1;
|
||||
}
|
||||
|
||||
## Old but not new
|
||||
for my $t (sort keys %{$old{$oldfile}}) {
|
||||
if (!exists $new{$t} and !exists $ok{OLD}{$t}) {
|
||||
print "Table not in $new: $t\n";
|
||||
next;
|
||||
}
|
||||
next if exists $ok{OLD}{$t} and !$ok{OLD}{$t};
|
||||
my $newt = exists $ok{OLD}{$t} ? $ok{OLD}{$t} : $t;
|
||||
my $oldcol = $old{$oldfile}{$t}{column};
|
||||
my $oldcolfull = $old{$oldfile}{$t}{columnfull};
|
||||
my $newcol = $new{$newt}{column};
|
||||
for my $c (keys %$oldcol) {
|
||||
if (!exists $newcol->{$c}) {
|
||||
print "Column $t.$c not in $new\n";
|
||||
next;
|
||||
}
|
||||
}
|
||||
for my $c (sort keys %$newcol) {
|
||||
if (!exists $oldcol->{$c}) {
|
||||
print "Column $t.$c not in $oldfile\n";
|
||||
next;
|
||||
}
|
||||
## Column types (roughly) match up?
|
||||
my $new = $newcol->{$c};
|
||||
my $old = $oldcolfull->{$c};
|
||||
|
||||
## Known exceptions:
|
||||
next if exists $colmapok{$c}{$old}{$new};
|
||||
|
||||
$old =~ s/ENUM.*/ENUM/;
|
||||
|
||||
next if $old eq 'ENUM' and $new eq 'media_type';
|
||||
|
||||
if (! exists $colmap{$old}{$new}) {
|
||||
print "Column types for $t.$c do not match: $old does not map to $new\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
## New but not old:
|
||||
for (sort keys %new) {
|
||||
if (!exists $old{$oldfile}{$_} and !exists $ok{NEW}{$_}) {
|
||||
print "Not in $oldfile: $_\n";
|
||||
next;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} ## end each file to be parsed
|
||||
|
||||
|
||||
sub check_valid_sql {
|
||||
|
||||
## Check for a few common problems in most php files
|
||||
|
||||
my $olddir = getcwd();
|
||||
chdir("../..");
|
||||
for my $basedir (qw/includes extensions/) {
|
||||
scan_dir($basedir);
|
||||
}
|
||||
chdir $olddir;
|
||||
|
||||
return;
|
||||
|
||||
} ## end of check_valid_sql
|
||||
|
||||
|
||||
sub scan_dir {
|
||||
|
||||
my $dir = shift;
|
||||
|
||||
opendir my $dh, $dir or die qq{Could not opendir $dir: $!\n};
|
||||
#print "Scanning $dir...\n";
|
||||
for my $file (grep { -f "$dir/$_" and /\.php$/ } readdir $dh) {
|
||||
find_problems("$dir/$file");
|
||||
}
|
||||
rewinddir $dh;
|
||||
for my $subdir (grep { -d "$dir/$_" and ! /\./ } readdir $dh) {
|
||||
scan_dir("$dir/$subdir");
|
||||
}
|
||||
closedir $dh or die qq{Closedir failed: $!\n};
|
||||
return;
|
||||
|
||||
} ## end of scan_dir
|
||||
|
||||
sub find_problems {
|
||||
|
||||
my $file = shift;
|
||||
open my $fh, '<', $file or die qq{Could not open "$file": $!\n};
|
||||
my $lastline = '';
|
||||
my $inarray = 0;
|
||||
while (<$fh>) {
|
||||
if (/FORCE INDEX/ and $file !~ /Database\w*\.php/) {
|
||||
warn "Found FORCE INDEX string at line $. of $file\n";
|
||||
}
|
||||
if (/REPLACE INTO/ and $file !~ /Database\w*\.php/) {
|
||||
warn "Found REPLACE INTO string at line $. of $file\n";
|
||||
}
|
||||
if (/\bIF\s*\(/ and $file !~ /DatabaseMySQL\.php/) {
|
||||
warn "Found IF string at line $. of $file\n";
|
||||
}
|
||||
if (/\bCONCAT\b/ and $file !~ /Database\w*\.php/) {
|
||||
warn "Found CONCAT string at line $. of $file\n";
|
||||
}
|
||||
if (/\bGROUP\s+BY\s*\d\b/i and $file !~ /Database\w*\.php/) {
|
||||
warn "Found GROUP BY # at line $. of $file\n";
|
||||
}
|
||||
if (/wfGetDB\s*\(\s+\)/io) {
|
||||
warn "wfGETDB is missing parameters at line $. of $file\n";
|
||||
}
|
||||
if (/=\s*array\s*\(\s*$/) {
|
||||
$inarray = 1;
|
||||
next;
|
||||
}
|
||||
if ($inarray) {
|
||||
if (/\s*\);\s*$/) {
|
||||
$inarray = 0;
|
||||
next;
|
||||
}
|
||||
next if ! /\w/ or /array\(\s*$/ or /^\s*#/ or m{^\s*//};
|
||||
if (! /,/) {
|
||||
my $nextline = <$fh>;
|
||||
last if ! defined $nextline;
|
||||
if ($nextline =~ /^\s*\)[;,]/) {
|
||||
$inarray = 0;
|
||||
next;
|
||||
}
|
||||
#warn "Array is missing a comma? Line $. of $file\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
close $fh or die qq{Could not close "$file": $!\n};
|
||||
return;
|
||||
|
||||
} ## end of find_problems
|
||||
|
||||
|
||||
__DATA__
|
||||
## Known exceptions
|
||||
OLD: searchindex ## We use tsearch2 directly on the page table instead
|
||||
RENAME: user mwuser ## Reserved word causing lots of problems
|
||||
|
|
@ -1,436 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
## Convert data from a MySQL mediawiki database into a Postgres mediawiki database
|
||||
|
||||
## NOTE: It is probably easier to dump your wiki using maintenance/dumpBackup.php
|
||||
## and then import it with maintenance/importDump.php
|
||||
|
||||
## If having UTF-8 problems, there are reports that adding --compatible=postgresql
|
||||
## may help.
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use Data::Dumper;
|
||||
use Getopt::Long;
|
||||
|
||||
use vars qw(%table %tz %special @torder $COM);
|
||||
my $VERSION = '1.2';
|
||||
|
||||
## The following options can be changed via command line arguments:
|
||||
my $MYSQLDB = '';
|
||||
my $MYSQLUSER = '';
|
||||
|
||||
## If the following are zero-length, we omit their arguments entirely:
|
||||
my $MYSQLHOST = '';
|
||||
my $MYSQLPASSWORD = '';
|
||||
my $MYSQLSOCKET = '';
|
||||
|
||||
## Name of the dump file created
|
||||
my $MYSQLDUMPFILE = 'mediawiki_upgrade.pg';
|
||||
|
||||
## How verbose should this script be (0, 1, or 2)
|
||||
my $verbose = 0;
|
||||
|
||||
my $help = 0;
|
||||
|
||||
my $USAGE = "
|
||||
Usage: $0 --db=<dbname> --user=<user> [OPTION]...
|
||||
Example: $0 --db=wikidb --user=wikiuser --pass=sushi
|
||||
|
||||
Converts a MediaWiki schema from MySQL to Postgres
|
||||
Options:
|
||||
db Name of the MySQL database
|
||||
user MySQL database username
|
||||
pass MySQL database password
|
||||
host MySQL database host
|
||||
socket MySQL database socket
|
||||
verbose Verbosity, increases with multiple uses
|
||||
";
|
||||
|
||||
GetOptions
|
||||
(
|
||||
'db=s' => \$MYSQLDB,
|
||||
'user=s' => \$MYSQLUSER,
|
||||
'pass=s' => \$MYSQLPASSWORD,
|
||||
'host=s' => \$MYSQLHOST,
|
||||
'socket=s' => \$MYSQLSOCKET,
|
||||
'verbose+' => \$verbose,
|
||||
'help' => \$help,
|
||||
);
|
||||
|
||||
die $USAGE
|
||||
if ! length $MYSQLDB
|
||||
or ! length $MYSQLUSER
|
||||
or $help;
|
||||
|
||||
## The Postgres schema file: should not be changed
|
||||
my $PG_SCHEMA = 'tables.sql';
|
||||
|
||||
## What version we default to when we can't parse the old schema
|
||||
my $MW_DEFAULT_VERSION = 110;
|
||||
|
||||
## Try and find a working version of mysqldump
|
||||
$verbose and warn "Locating the mysqldump executable\n";
|
||||
my @MYSQLDUMP = ('/usr/local/bin/mysqldump', '/usr/bin/mysqldump');
|
||||
my $MYSQLDUMP;
|
||||
for my $mytry (@MYSQLDUMP) {
|
||||
next if ! -e $mytry;
|
||||
-x $mytry or die qq{Not an executable file: "$mytry"\n};
|
||||
my $version = qx{$mytry -V};
|
||||
$version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n};
|
||||
$MYSQLDUMP = $mytry;
|
||||
}
|
||||
$MYSQLDUMP or die qq{Could not find the mysqldump program\n};
|
||||
|
||||
## Flags we use for mysqldump
|
||||
my @MYSQLDUMPARGS = qw(
|
||||
--skip-lock-tables
|
||||
--complete-insert
|
||||
--skip-extended-insert
|
||||
--skip-add-drop-table
|
||||
--skip-add-locks
|
||||
--skip-disable-keys
|
||||
--skip-set-charset
|
||||
--skip-comments
|
||||
--skip-quote-names
|
||||
);
|
||||
|
||||
|
||||
$verbose and warn "Checking that mysqldump can handle our flags\n";
|
||||
## Make sure this version can handle all the flags we want.
|
||||
## Combine with user dump below
|
||||
my $MYSQLDUMPARGS = join ' ' => @MYSQLDUMPARGS;
|
||||
## Argh. Any way to make this work on Win32?
|
||||
my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1};
|
||||
if ($version =~ /unknown option/) {
|
||||
die qq{Sorry, you need to use a newer version of the mysqldump program than the one at "$MYSQLDUMP"\n};
|
||||
}
|
||||
|
||||
push @MYSQLDUMPARGS, "--user=$MYSQLUSER";
|
||||
length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD";
|
||||
length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST";
|
||||
|
||||
## Open the dump file to hold the mysqldump output
|
||||
open my $mdump, '+>', $MYSQLDUMPFILE or die qq{Could not open "$MYSQLDUMPFILE": $!\n};
|
||||
print qq{Writing file "$MYSQLDUMPFILE"\n};
|
||||
|
||||
open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, '--no-data', $MYSQLDB;
|
||||
my $oldselect = select $mdump;
|
||||
|
||||
print while <$mfork2>;
|
||||
|
||||
## Slurp in the current schema
|
||||
my $current_schema;
|
||||
seek $mdump, 0, 0;
|
||||
{
|
||||
local $/;
|
||||
$current_schema = <$mdump>;
|
||||
}
|
||||
seek $mdump, 0, 0;
|
||||
truncate $mdump, 0;
|
||||
|
||||
warn qq{Trying to determine database version...\n} if $verbose;
|
||||
|
||||
my $current_version = 0;
|
||||
if ($current_schema =~ /CREATE TABLE \S+cur /) {
|
||||
$current_version = 103;
|
||||
}
|
||||
elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) {
|
||||
$current_version = 104;
|
||||
}
|
||||
elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) {
|
||||
$current_version = 105;
|
||||
}
|
||||
elsif ($current_schema !~ /CREATE TABLE \S+validate /) {
|
||||
$current_version = 106;
|
||||
}
|
||||
elsif ($current_schema !~ /ipb_auto tinyint/) {
|
||||
$current_version = 107;
|
||||
}
|
||||
elsif ($current_schema !~ /CREATE TABLE \S+profiling /) {
|
||||
$current_version = 108;
|
||||
}
|
||||
elsif ($current_schema !~ /CREATE TABLE \S+querycachetwo /) {
|
||||
$current_version = 109;
|
||||
}
|
||||
else {
|
||||
$current_version = $MW_DEFAULT_VERSION;
|
||||
}
|
||||
|
||||
if (!$current_version) {
|
||||
warn qq{WARNING! Could not figure out the old version, assuming MediaWiki $MW_DEFAULT_VERSION\n};
|
||||
$current_version = $MW_DEFAULT_VERSION;
|
||||
}
|
||||
|
||||
## Check for a table prefix:
|
||||
my $table_prefix = '';
|
||||
if ($current_schema =~ /CREATE TABLE (\S+)querycache /) {
|
||||
$table_prefix = $1;
|
||||
}
|
||||
|
||||
warn qq{Old schema is from MediaWiki version $current_version\n} if $verbose;
|
||||
warn qq{Table prefix is "$table_prefix"\n} if $verbose and length $table_prefix;
|
||||
|
||||
$verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n};
|
||||
my $now = scalar localtime;
|
||||
my $conninfo = '';
|
||||
$MYSQLHOST and $conninfo .= "\n-- host $MYSQLHOST";
|
||||
$MYSQLSOCKET and $conninfo .= "\n-- socket $MYSQLSOCKET";
|
||||
|
||||
print qq{
|
||||
-- Dump of MySQL Mediawiki tables for import into a Postgres Mediawiki schema
|
||||
-- Performed by the program: $0
|
||||
-- Version: $VERSION
|
||||
-- Author: Greg Sabino Mullane <greg\@turnstep.com> Comments welcome
|
||||
--
|
||||
-- This file was created: $now
|
||||
-- Executable used: $MYSQLDUMP
|
||||
-- Connection information:
|
||||
-- database: $MYSQLDB
|
||||
-- user: $MYSQLUSER$conninfo
|
||||
|
||||
-- This file can be imported manually with psql like so:
|
||||
-- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename
|
||||
-- This will overwrite any existing MediaWiki information, so be careful
|
||||
|
||||
};
|
||||
|
||||
## psql specific stuff
|
||||
print q{
|
||||
\\set ON_ERROR_STOP
|
||||
BEGIN;
|
||||
SET client_min_messages = 'WARNING';
|
||||
SET timezone = 'GMT';
|
||||
SET DateStyle = 'ISO, YMD';
|
||||
};
|
||||
|
||||
warn qq{Reading in the Postgres schema information\n} if $verbose;
|
||||
open my $schema, '<', $PG_SCHEMA
|
||||
or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n};
|
||||
my $t;
|
||||
while (<$schema>) {
|
||||
if (/CREATE TABLE\s+(\S+)/) {
|
||||
$t = $1;
|
||||
$table{$t}={};
|
||||
$verbose > 1 and warn qq{ Found table $t\n};
|
||||
}
|
||||
elsif (/^ +(\w+)\s+TIMESTAMP/) {
|
||||
$tz{$t}{$1}++;
|
||||
$verbose > 1 and warn qq{ Got a timestamp for column $1\n};
|
||||
}
|
||||
elsif (/REFERENCES\s*([^( ]+)/) {
|
||||
my $ref = $1;
|
||||
exists $table{$ref} or die qq{No parent table $ref found for $t\n};
|
||||
$table{$t}{$ref}++;
|
||||
}
|
||||
}
|
||||
close $schema or die qq{Could not close "$PG_SCHEMA": $!\n};
|
||||
|
||||
## Read in special cases and table/version information
|
||||
$verbose and warn qq{Reading in schema exception information\n};
|
||||
my %version_tables;
|
||||
while (<DATA>) {
|
||||
if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) {
|
||||
my $list = join '|' => split /\s+/ => $2;
|
||||
$version_tables{$1} = qr{\b$list\b};
|
||||
next;
|
||||
}
|
||||
next unless /^(\w+)\s*(.*)/;
|
||||
$special{$1} = $2||'';
|
||||
$special{$2} = $1 if length $2;
|
||||
}
|
||||
|
||||
## Determine the order of tables based on foreign key constraints
|
||||
$verbose and warn qq{Figuring out order of tables to dump\n};
|
||||
my %dumped;
|
||||
my $bail = 0;
|
||||
{
|
||||
my $found=0;
|
||||
T: for my $t (sort keys %table) {
|
||||
next if exists $dumped{$t} and $dumped{$t} >= 1;
|
||||
$found=1;
|
||||
for my $dep (sort keys %{$table{$t}}) {
|
||||
next T if ! exists $dumped{$dep} or $dumped{$dep} < 0;
|
||||
}
|
||||
$dumped{$t} = -1 if ! exists $dumped{$t};
|
||||
## Skip certain tables that are not imported
|
||||
next if exists $special{$t} and !$special{$t};
|
||||
push @torder, $special{$t} || $t;
|
||||
}
|
||||
last if !$found;
|
||||
push @torder, '---';
|
||||
for (values %dumped) { $_+=2; }
|
||||
die "Too many loops!\n" if $bail++ > 1000;
|
||||
redo;
|
||||
}
|
||||
|
||||
## Prepare the Postgres database for the move
|
||||
$verbose and warn qq{Writing Postgres transformation information\n};
|
||||
|
||||
print "\n-- Empty out all existing tables\n";
|
||||
$verbose and warn qq{Writing truncates to empty existing tables\n};
|
||||
|
||||
|
||||
for my $t (@torder, 'objectcache', 'querycache') {
|
||||
next if $t eq '---';
|
||||
my $tname = $special{$t}||$t;
|
||||
printf qq{TRUNCATE TABLE %-20s CASCADE;\n}, qq{"$tname"};
|
||||
}
|
||||
print "\n\n";
|
||||
|
||||
print qq{-- Allow rc_ip to contain empty string, will convert at end\n};
|
||||
print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n};
|
||||
|
||||
print "-- Changing all timestamp fields to handle raw integers\n";
|
||||
for my $t (sort keys %tz) {
|
||||
next if $t eq 'archive2';
|
||||
for my $c (sort keys %{$tz{$t}}) {
|
||||
printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c;
|
||||
}
|
||||
}
|
||||
print "\n";
|
||||
|
||||
print q{
|
||||
INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10);
|
||||
};
|
||||
|
||||
## If we have a table _prefix, we need to temporarily rename all of our Postgres
|
||||
## tables temporarily for the import. Perhaps consider making this an auto-schema
|
||||
## thing in the future.
|
||||
if (length $table_prefix) {
|
||||
print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix "$table_prefix"\n\n};
|
||||
for my $t (@torder) {
|
||||
next if $t eq '---' or $t eq 'text' or $t eq 'user';
|
||||
my $tname = $special{$t}||$t;
|
||||
printf qq{ALTER TABLE %-18s RENAME TO "${table_prefix}$tname";\n}, qq{"$tname"};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
## Try and dump the ill-named "user" table:
|
||||
## We do this table alone because "user" is a reserved word.
|
||||
print q{
|
||||
|
||||
SET escape_string_warning TO 'off';
|
||||
\\o /dev/null
|
||||
|
||||
-- Postgres uses a table name of "mwuser" instead of "user"
|
||||
|
||||
-- Create a dummy user to satisfy fk contraints especially with revisions
|
||||
SELECT setval('user_user_id_seq',0,'false');
|
||||
INSERT INTO mwuser
|
||||
VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now());
|
||||
|
||||
};
|
||||
|
||||
push @MYSQLDUMPARGS, '--no-create-info';
|
||||
|
||||
$verbose and warn qq{Dumping "user" table\n};
|
||||
$verbose > 2 and warn Dumper \@MYSQLDUMPARGS;
|
||||
my $usertable = "${table_prefix}user";
|
||||
open my $mfork, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable;
|
||||
## Unfortunately, there is no easy way to catch errors
|
||||
my $numusers = 0;
|
||||
while (<$mfork>) {
|
||||
++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/;
|
||||
}
|
||||
close $mfork;
|
||||
if ($numusers < 1) {
|
||||
warn qq{No users found, probably a connection error.\n};
|
||||
print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n};
|
||||
close $mdump or die qq{Could not close "$MYSQLDUMPFILE": $!\n};
|
||||
exit;
|
||||
}
|
||||
print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n";
|
||||
|
||||
warn qq{Dumping all other tables from the MySQL schema\n} if $verbose;
|
||||
|
||||
## Dump the rest of the tables, in chunks based on constraints
|
||||
## We do not need the user table:
|
||||
my @dumplist = grep { $_ ne 'user'} @torder;
|
||||
my @alist;
|
||||
{
|
||||
undef @alist;
|
||||
PICKATABLE: {
|
||||
my $tname = shift @dumplist;
|
||||
## XXX Make this dynamic below
|
||||
for my $ver (sort {$b <=> $a } keys %version_tables) {
|
||||
redo PICKATABLE if $tname =~ $version_tables{$ver};
|
||||
}
|
||||
$tname = "${table_prefix}$tname" if length $table_prefix;
|
||||
next if $tname !~ /^\w/;
|
||||
push @alist, $tname;
|
||||
$verbose and warn " $tname...\n";
|
||||
pop @alist and last if index($alist[-1],'---') >= 0;
|
||||
redo if @dumplist;
|
||||
}
|
||||
|
||||
## Dump everything else
|
||||
open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist;
|
||||
print while <$mfork2>;
|
||||
close $mfork2;
|
||||
warn qq{Finished dumping from MySQL\n} if $verbose;
|
||||
|
||||
redo if @dumplist;
|
||||
}
|
||||
|
||||
warn qq{Writing information to return Postgres database to normal\n} if $verbose;
|
||||
print qq{ALTER TABLE ${table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n};
|
||||
print qq{ CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n};
|
||||
|
||||
## Return tables to their original names if a table prefix was used.
|
||||
if (length $table_prefix) {
|
||||
print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n};
|
||||
my $maxsize = 18;
|
||||
for (@torder) {
|
||||
$maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize;
|
||||
}
|
||||
for my $t (@torder) {
|
||||
next if $t eq '---' or $t eq 'text' or $t eq 'user';
|
||||
my $tname = $special{$t}||$t;
|
||||
printf qq{ALTER TABLE %*s RENAME TO "$tname";\n}, $maxsize+1, qq{"${table_prefix}$tname"};
|
||||
}
|
||||
}
|
||||
|
||||
print qq{\n\n--Returning timestamps to normal\n};
|
||||
for my $t (sort keys %tz) {
|
||||
next if $t eq 'archive2';
|
||||
for my $c (sort keys %{$tz{$t}}) {
|
||||
printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n".
|
||||
" USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c;
|
||||
}
|
||||
}
|
||||
|
||||
## Reset sequences
|
||||
print q{
|
||||
SELECT setval('filearchive_fa_id_seq', 1+coalesce(max(fa_id) ,0),false) FROM filearchive;
|
||||
SELECT setval('ipblocks_ipb_id_seq', 1+coalesce(max(ipb_id) ,0),false) FROM ipblocks;
|
||||
SELECT setval('job_job_id_seq', 1+coalesce(max(job_id) ,0),false) FROM job;
|
||||
SELECT setval('logging_log_id_seq', 1+coalesce(max(log_id) ,0),false) FROM logging;
|
||||
SELECT setval('page_page_id_seq', 1+coalesce(max(page_id),0),false) FROM page;
|
||||
SELECT setval('page_restrictions_pr_id_seq', 1+coalesce(max(pr_id) ,0),false) FROM page_restrictions;
|
||||
SELECT setval('recentchanges_rc_id_seq', 1+coalesce(max(rc_id) ,0),false) FROM recentchanges;
|
||||
SELECT setval('revision_rev_id_seq', 1+coalesce(max(rev_id) ,0),false) FROM revision;
|
||||
SELECT setval('text_old_id_seq', 1+coalesce(max(old_id) ,0),false) FROM "text";
|
||||
SELECT setval('user_user_id_seq', 1+coalesce(max(user_id),0),false) FROM mwuser;
|
||||
};
|
||||
|
||||
print "COMMIT;\n\\o\n\n-- End of dump\n\n";
|
||||
select $oldselect;
|
||||
close $mdump or die qq{Could not close "$MYSQLDUMPFILE": $!\n};
|
||||
exit;
|
||||
|
||||
|
||||
__DATA__
|
||||
## Known remappings: either indicate the MySQL name,
|
||||
## or leave blank if it should be skipped
|
||||
mwuser user
|
||||
archive2
|
||||
profiling
|
||||
objectcache
|
||||
|
||||
## Which tables to ignore depending on the version
|
||||
VERSION 1.6: externallinks job templatelinks transcache
|
||||
VERSION 1.7: filearchive langlinks querycache_info
|
||||
VERSION 1.9: querycachetwo page_restrictions redirect
|
||||
|
||||
Loading…
Reference in a new issue