diff --git a/docs/databases/postgres.txt b/docs/databases/postgres.txt index c47a077b2e3..8427eaa4590 100644 --- a/docs/databases/postgres.txt +++ b/docs/databases/postgres.txt @@ -74,16 +74,6 @@ referenced directly by the code (unlike sequence names). Most of the indexes in the file as of this writing are there due to production testing of expensive queries on a busy wiki. -== Keeping in sync with tables.sql == - -The script maintenance/postgres/compare_schemas.pl should be -periodically run. It will parse both "tables.sql" files and -produce any differences found. Such differences should be fixed -or exceptions specifically carved out by editing the script -itself. This script has also been very useful in finding problems -in maintenance/tables.sql itself, as it is very strict in the -format it expects things to be in. :) - == MySQL differences == The major differences between MySQL and Postgres are represented as diff --git a/maintenance/postgres/compare_schemas.pl b/maintenance/postgres/compare_schemas.pl deleted file mode 100755 index 50b93486a66..00000000000 --- a/maintenance/postgres/compare_schemas.pl +++ /dev/null @@ -1,565 +0,0 @@ -#!/usr/bin/perl - -## Rough check that the base and postgres "tables.sql" are in sync -## Should be run from maintenance/postgres -## Checks a few other things as well... - -use strict; -use warnings; -use Data::Dumper; -use Cwd; - -#check_valid_sql(); - -my @old = ('../tables.sql'); -my $new = 'tables.sql'; -my @xfile; - -## Read in exceptions and other metadata -my %ok; -while () { - next unless /^(\w+)\s*:\s*([^#]+)/; - my ($name,$val) = ($1,$2); - chomp $val; - if ($name eq 'RENAME') { - die "Invalid rename\n" unless $val =~ /(\w+)\s+(\w+)/; - $ok{OLD}{$1} = $2; - $ok{NEW}{$2} = $1; - next; - } - if ($name eq 'XFILE') { - push @xfile, $val; - next; - } - for (split /\s+/ => $val) { - $ok{$name}{$_} = 0; - } -} - -my $datatype = join '|' => qw( -bool -tinyint smallint int bigint real float -tinytext mediumtext text char varchar varbinary binary -timestamp datetime -tinyblob mediumblob blob -); -$datatype .= q{|ENUM\([\"\w\', ]+\)}; -$datatype = qr{($datatype)}; - -my $typeval = qr{(\(\d+\))?}; - -my $typeval2 = qr{ signed| unsigned| binary| NOT NULL| NULL| PRIMARY KEY| AUTO_INCREMENT| default ['\-\d\w"]+| REFERENCES .+CASCADE}; - -my $indextype = join '|' => qw(INDEX KEY FULLTEXT), 'PRIMARY KEY', 'UNIQUE INDEX', 'UNIQUE KEY'; -$indextype = qr{$indextype}; - -my $engine = qr{TYPE|ENGINE}; - -my $tabletype = qr{InnoDB|MyISAM|HEAP|HEAP MAX_ROWS=\d+|InnoDB MAX_ROWS=\d+ AVG_ROW_LENGTH=\d+}; - -my $charset = qr{utf8|binary}; - -open my $newfh, '<', $new or die qq{Could not open $new: $!\n}; - - -my ($table,%old); - -## Read in the xfiles -my %xinfo; -for my $xfile (@xfile) { - print "Loading $xfile\n"; - my $info = parse_sql($xfile); - for (keys %$info) { - $xinfo{$_} = $info->{$_}; - } -} - -for my $oldfile (@old) { - print "Loading $oldfile\n"; - my $info = parse_sql($oldfile); - for (keys %xinfo) { - $info->{$_} = $xinfo{$_}; - } - $old{$oldfile} = $info; -} - -sub parse_sql { - - my $oldfile = shift; - - open my $oldfh, '<', $oldfile or die qq{Could not open $oldfile: $!\n}; - - my %info; - while (<$oldfh>) { - next if /^\s*\-\-/ or /^\s+$/; - s/\s*\-\- [\w ]+$//; - chomp; - - if (/CREATE\s*TABLE/i) { - if (m{^CREATE TABLE /\*_\*/(\w+) \($}) { - $table = $1; - } - elsif (m{^CREATE TABLE /\*\$wgDBprefix\*/(\w+) \($}) { - $table = $1; - } - else { - die qq{Invalid CREATE TABLE at line $. of $oldfile\n}; - } - $info{$table}{name}=$table; - } - elsif (m{^\) /\*\$wgDBTableOptions\*/}) { - $info{$table}{engine} = 'ENGINE'; - $info{$table}{type} = 'variable'; - } - elsif (/^\) ($engine)=($tabletype);$/) { - $info{$table}{engine}=$1; - $info{$table}{type}=$2; - } - elsif (/^\) ($engine)=($tabletype), DEFAULT CHARSET=($charset);$/) { - $info{$table}{engine}=$1; - $info{$table}{type}=$2; - $info{$table}{charset}=$3; - } - elsif (/^ (\w+) $datatype$typeval$typeval2{0,4},?$/) { - $info{$table}{column}{$1} = $2; - my $extra = $3 || ''; - $info{$table}{columnfull}{$1} = "$2$extra"; - } - elsif (m{^ UNIQUE KEY (\w+) \((.+?)\)}) { - } - elsif (m{^CREATE (?:UNIQUE )?(?:FULLTEXT )?INDEX /\*i\*/(\w+) ON /\*_\*/(\w+) \((.+?)\);}) { - } - elsif (m{^\s*PRIMARY KEY \([\w,]+\)}) { - } - else { - die "Cannot parse line $. of $oldfile:\n$_\n"; - } - - } - close $oldfh or die qq{Could not close "$oldfile": $!\n}; - - return \%info; - -} ## end of parse_sql - -for my $oldfile (@old) { - -## Begin non-standard indent - -## MySQL sanity checks -for my $table (sort keys %{$old{$oldfile}}) { - my $t = $old{$oldfile}{$table}; - if ($t->{engine} eq 'TYPE') { - die "Invalid engine for $oldfile: $t->{engine}\n" unless $t->{name} eq 'profiling'; - } - my $charset = $t->{charset} || ''; - if ($oldfile !~ /binary/ and $charset eq 'binary') { - die "Invalid charset for $oldfile: $charset\n"; - } -} - -my $dtypelist = join '|' => qw( -SMALLINT INTEGER BIGINT NUMERIC SERIAL -TEXT CHAR VARCHAR -BYTEA -TIMESTAMPTZ -CIDR -); -my $dtype = qr{($dtypelist)}; -my %new; -my ($infunction,$inview,$inrule,$lastcomma) = (0,0,0,0); -my %custom_type; -seek $newfh, 0, 0; -while (<$newfh>) { - next if /^\s*\-\-/ or /^\s*$/; - s/\s*\-\- [\w ']+$//; - next if /^BEGIN;/ or /^SET / or /^COMMIT;/; - next if /^CREATE SEQUENCE/; - next if /^CREATE(?: UNIQUE)? INDEX/; - next if /^CREATE FUNCTION/; - next if /^CREATE TRIGGER/ or /^ FOR EACH ROW/; - next if /^INSERT INTO/ or /^ VALUES \(/; - next if /^ALTER TABLE/; - next if /^DROP SEQUENCE/; - next if /^DROP FUNCTION/; - - if (/^CREATE TYPE (\w+)/) { - die "Type $1 declared more than once!\n" if $custom_type{$1}++; - $dtype = qr{($dtypelist|$1)}; - next; - } - - chomp; - - if (/^\$mw\$;?$/) { - $infunction = $infunction ? 0 : 1; - next; - } - next if $infunction; - - next if /^CREATE VIEW/ and $inview = 1; - if ($inview) { - /;$/ and $inview = 0; - next; - } - - next if /^CREATE RULE/ and $inrule = 1; - if ($inrule) { - /;$/ and $inrule = 0; - next; - } - - if (/^CREATE TABLE "?(\w+)"? \($/) { - $table = $1; - $new{$table}{name}=$table; - $lastcomma = 1; - } - elsif (/^\);$/) { - if ($lastcomma) { - warn "Stray comma before line $.\n"; - } - } - elsif (/^ (\w+) +$dtype.*?(,?)(?: --.*)?$/) { - $new{$table}{column}{$1} = $2; - if (!$lastcomma) { - print "Missing comma before line $. of $new\n"; - } - $lastcomma = $3 ? 1 : 0; - } - elsif (m{^\s*PRIMARY KEY \([\w,]+\)}) { - $lastcomma = 0; - } - else { - die "Cannot parse line $. of $new:\n$_\n"; - } -} - -## Which column types are okay to map from mysql to postgres? -my $COLMAP = q{ -## INTS: -tinyint SMALLINT -int INTEGER SERIAL -smallint SMALLINT -bigint BIGINT -real NUMERIC -float NUMERIC - -## TEXT: -varchar(15) TEXT -varchar(32) TEXT -varchar(70) TEXT -varchar(255) TEXT -varchar TEXT -text TEXT -tinytext TEXT -ENUM TEXT - -## TIMESTAMPS: -varbinary(14) TIMESTAMPTZ -binary(14) TIMESTAMPTZ -datetime TIMESTAMPTZ -timestamp TIMESTAMPTZ - -## BYTEA: -mediumblob BYTEA - -## OTHER: -bool SMALLINT # Sigh - -}; -## Allow specific exceptions to the above -my $COLMAPOK = q{ -## User inputted text strings: -ar_comment tinyblob TEXT -fa_description tinyblob TEXT -img_description tinyblob TEXT -ipb_reason tinyblob TEXT -log_action varbinary(32) TEXT -log_type varbinary(32) TEXT -oi_description tinyblob TEXT -rev_comment tinyblob TEXT -rc_log_action varbinary(255) TEXT -rc_log_type varbinary(255) TEXT - -## Simple text-only strings: -ar_flags tinyblob TEXT -cf_name varbinary(255) TEXT -cf_value blob TEXT -ar_sha1 varbinary(32) TEXT -cl_collation varbinary(32) TEXT -cl_sortkey varbinary(230) TEXT -ct_params blob TEXT -fa_minor_mime varbinary(100) TEXT -fa_storage_group varbinary(16) TEXT # Just 'deleted' for now, should stay plain text -fa_storage_key varbinary(64) TEXT # sha1 plus text extension -ipb_address tinyblob TEXT # IP address or username -ipb_range_end tinyblob TEXT # hexadecimal -ipb_range_start tinyblob TEXT # hexadecimal -img_minor_mime varbinary(100) TEXT -lc_lang varbinary(32) TEXT -lc_value varbinary(32) TEXT -img_sha1 varbinary(32) TEXT -iw_wikiid varchar(64) TEXT -job_cmd varbinary(60) TEXT # Should we limit to 60 as well? -keyname varbinary(255) TEXT # No tablename prefix (objectcache) -ll_lang varbinary(20) TEXT # Language code -lc_value mediumblob TEXT -log_params blob TEXT # LF separated list of args -log_type varbinary(10) TEXT -ls_field varbinary(32) TEXT -md_deps mediumblob TEXT # JSON -md_module varbinary(255) TEXT -md_skin varbinary(32) TEXT -mr_blob mediumblob TEXT # JSON -mr_lang varbinary(32) TEXT -mr_resource varbinary(255) TEXT -mrl_message varbinary(255) TEXT -mrl_resource varbinary(255) TEXT -oi_minor_mime varbinary(100) TEXT -oi_sha1 varbinary(32) TEXT -old_flags tinyblob TEXT -old_text mediumblob TEXT -pp_propname varbinary(60) TEXT -pp_value blob TEXT -page_restrictions tinyblob TEXT # CSV string -pf_server varchar(30) TEXT -pr_level varbinary(60) TEXT -pr_type varbinary(60) TEXT -pt_create_perm varbinary(60) TEXT -pt_reason tinyblob TEXT -qc_type varbinary(32) TEXT -qcc_type varbinary(32) TEXT -qci_type varbinary(32) TEXT -rc_params blob TEXT -rev_sha1 varbinary(32) TEXT -rlc_to_blob blob TEXT -ts_tags blob TEXT -ufg_group varbinary(32) TEXT -ug_group varbinary(32) TEXT -ul_value blob TEXT -up_property varbinary(255) TEXT -up_value blob TEXT -us_sha1 varchar(31) TEXT -us_source_type varchar(50) TEXT -us_status varchar(50) TEXT -user_email_token binary(32) TEXT -user_ip varbinary(40) TEXT -user_newpassword tinyblob TEXT -user_options blob TEXT -user_password tinyblob TEXT -user_token binary(32) TEXT -iwl_prefix varbinary(20) TEXT - -## Text URLs: -el_index blob TEXT -el_to blob TEXT -iw_api blob TEXT -iw_url blob TEXT -tb_url blob TEXT -tc_url varbinary(255) TEXT - -## Deprecated or not yet used: -ar_text mediumblob TEXT -job_params blob TEXT -log_deleted tinyint INTEGER # Not used yet, but keep it INTEGER for safety -rc_type tinyint CHAR - -## Number tweaking: -fa_bits int SMALLINT # bits per pixel -fa_height int SMALLINT -fa_width int SMALLINT # Hope we don't see an image this wide... -hc_id int BIGINT # Odd that site_stats is all bigint... -img_bits int SMALLINT # bits per image should stay sane -oi_bits int SMALLINT - -## True binary fields, usually due to gzdeflate and/or serialize: -math_inputhash varbinary(16) BYTEA -math_outputhash varbinary(16) BYTEA - -## Namespaces: not need for such a high range -ar_namespace int SMALLINT -job_namespace int SMALLINT -log_namespace int SMALLINT -page_namespace int SMALLINT -pl_namespace int SMALLINT -pt_namespace int SMALLINT -qc_namespace int SMALLINT -rc_namespace int SMALLINT -rd_namespace int SMALLINT -rlc_to_namespace int SMALLINT -tl_namespace int SMALLINT -wl_namespace int SMALLINT - -## Easy enough to change if a wiki ever does grow this big: -ss_active_users bigint INTEGER -ss_good_articles bigint INTEGER -ss_total_edits bigint INTEGER -ss_total_pages bigint INTEGER -ss_users bigint INTEGER - -## True IP - keep an eye on these, coders tend to make textual assumptions -rc_ip varbinary(40) CIDR # Want to keep an eye on this - -## Others: -tc_time int TIMESTAMPTZ - - -}; - -my %colmap; -for (split /\n/ => $COLMAP) { - next unless /^\w/; - s/(.*?)#.*/$1/; - my ($col,@maps) = split / +/, $_; - for (@maps) { - $colmap{$col}{$_} = 1; - } -} - -my %colmapok; -for (split /\n/ => $COLMAPOK) { - next unless /^\w/; - my ($col,$old,$new) = split / +/, $_; - $colmapok{$col}{$old}{$new} = 1; -} - -## Old but not new -for my $t (sort keys %{$old{$oldfile}}) { - if (!exists $new{$t} and !exists $ok{OLD}{$t}) { - print "Table not in $new: $t\n"; - next; - } - next if exists $ok{OLD}{$t} and !$ok{OLD}{$t}; - my $newt = exists $ok{OLD}{$t} ? $ok{OLD}{$t} : $t; - my $oldcol = $old{$oldfile}{$t}{column}; - my $oldcolfull = $old{$oldfile}{$t}{columnfull}; - my $newcol = $new{$newt}{column}; - for my $c (keys %$oldcol) { - if (!exists $newcol->{$c}) { - print "Column $t.$c not in $new\n"; - next; - } - } - for my $c (sort keys %$newcol) { - if (!exists $oldcol->{$c}) { - print "Column $t.$c not in $oldfile\n"; - next; - } - ## Column types (roughly) match up? - my $new = $newcol->{$c}; - my $old = $oldcolfull->{$c}; - - ## Known exceptions: - next if exists $colmapok{$c}{$old}{$new}; - - $old =~ s/ENUM.*/ENUM/; - - next if $old eq 'ENUM' and $new eq 'media_type'; - - if (! exists $colmap{$old}{$new}) { - print "Column types for $t.$c do not match: $old does not map to $new\n"; - } - } -} -## New but not old: -for (sort keys %new) { - if (!exists $old{$oldfile}{$_} and !exists $ok{NEW}{$_}) { - print "Not in $oldfile: $_\n"; - next; - } -} - - -} ## end each file to be parsed - - -sub check_valid_sql { - - ## Check for a few common problems in most php files - - my $olddir = getcwd(); - chdir("../.."); - for my $basedir (qw/includes extensions/) { - scan_dir($basedir); - } - chdir $olddir; - - return; - -} ## end of check_valid_sql - - -sub scan_dir { - - my $dir = shift; - - opendir my $dh, $dir or die qq{Could not opendir $dir: $!\n}; - #print "Scanning $dir...\n"; - for my $file (grep { -f "$dir/$_" and /\.php$/ } readdir $dh) { - find_problems("$dir/$file"); - } - rewinddir $dh; - for my $subdir (grep { -d "$dir/$_" and ! /\./ } readdir $dh) { - scan_dir("$dir/$subdir"); - } - closedir $dh or die qq{Closedir failed: $!\n}; - return; - -} ## end of scan_dir - -sub find_problems { - - my $file = shift; - open my $fh, '<', $file or die qq{Could not open "$file": $!\n}; - my $lastline = ''; - my $inarray = 0; - while (<$fh>) { - if (/FORCE INDEX/ and $file !~ /Database\w*\.php/) { - warn "Found FORCE INDEX string at line $. of $file\n"; - } - if (/REPLACE INTO/ and $file !~ /Database\w*\.php/) { - warn "Found REPLACE INTO string at line $. of $file\n"; - } - if (/\bIF\s*\(/ and $file !~ /DatabaseMySQL\.php/) { - warn "Found IF string at line $. of $file\n"; - } - if (/\bCONCAT\b/ and $file !~ /Database\w*\.php/) { - warn "Found CONCAT string at line $. of $file\n"; - } - if (/\bGROUP\s+BY\s*\d\b/i and $file !~ /Database\w*\.php/) { - warn "Found GROUP BY # at line $. of $file\n"; - } - if (/wfGetDB\s*\(\s+\)/io) { - warn "wfGETDB is missing parameters at line $. of $file\n"; - } - if (/=\s*array\s*\(\s*$/) { - $inarray = 1; - next; - } - if ($inarray) { - if (/\s*\);\s*$/) { - $inarray = 0; - next; - } - next if ! /\w/ or /array\(\s*$/ or /^\s*#/ or m{^\s*//}; - if (! /,/) { - my $nextline = <$fh>; - last if ! defined $nextline; - if ($nextline =~ /^\s*\)[;,]/) { - $inarray = 0; - next; - } - #warn "Array is missing a comma? Line $. of $file\n"; - } - } - } - close $fh or die qq{Could not close "$file": $!\n}; - return; - -} ## end of find_problems - - -__DATA__ -## Known exceptions -OLD: searchindex ## We use tsearch2 directly on the page table instead -RENAME: user mwuser ## Reserved word causing lots of problems diff --git a/maintenance/postgres/mediawiki_mysql2postgres.pl b/maintenance/postgres/mediawiki_mysql2postgres.pl deleted file mode 100755 index fa7a3191c90..00000000000 --- a/maintenance/postgres/mediawiki_mysql2postgres.pl +++ /dev/null @@ -1,436 +0,0 @@ -#!/usr/bin/perl - -## Convert data from a MySQL mediawiki database into a Postgres mediawiki database - -## NOTE: It is probably easier to dump your wiki using maintenance/dumpBackup.php -## and then import it with maintenance/importDump.php - -## If having UTF-8 problems, there are reports that adding --compatible=postgresql -## may help. - -use strict; -use warnings; -use Data::Dumper; -use Getopt::Long; - -use vars qw(%table %tz %special @torder $COM); -my $VERSION = '1.2'; - -## The following options can be changed via command line arguments: -my $MYSQLDB = ''; -my $MYSQLUSER = ''; - -## If the following are zero-length, we omit their arguments entirely: -my $MYSQLHOST = ''; -my $MYSQLPASSWORD = ''; -my $MYSQLSOCKET = ''; - -## Name of the dump file created -my $MYSQLDUMPFILE = 'mediawiki_upgrade.pg'; - -## How verbose should this script be (0, 1, or 2) -my $verbose = 0; - -my $help = 0; - -my $USAGE = " -Usage: $0 --db= --user= [OPTION]... -Example: $0 --db=wikidb --user=wikiuser --pass=sushi - -Converts a MediaWiki schema from MySQL to Postgres -Options: - db Name of the MySQL database - user MySQL database username - pass MySQL database password - host MySQL database host - socket MySQL database socket - verbose Verbosity, increases with multiple uses -"; - -GetOptions - ( - 'db=s' => \$MYSQLDB, - 'user=s' => \$MYSQLUSER, - 'pass=s' => \$MYSQLPASSWORD, - 'host=s' => \$MYSQLHOST, - 'socket=s' => \$MYSQLSOCKET, - 'verbose+' => \$verbose, - 'help' => \$help, - ); - -die $USAGE - if ! length $MYSQLDB - or ! length $MYSQLUSER - or $help; - -## The Postgres schema file: should not be changed -my $PG_SCHEMA = 'tables.sql'; - -## What version we default to when we can't parse the old schema -my $MW_DEFAULT_VERSION = 110; - -## Try and find a working version of mysqldump -$verbose and warn "Locating the mysqldump executable\n"; -my @MYSQLDUMP = ('/usr/local/bin/mysqldump', '/usr/bin/mysqldump'); -my $MYSQLDUMP; -for my $mytry (@MYSQLDUMP) { - next if ! -e $mytry; - -x $mytry or die qq{Not an executable file: "$mytry"\n}; - my $version = qx{$mytry -V}; - $version =~ /^mysqldump\s+Ver\s+\d+/ or die qq{Program at "$mytry" does not act like mysqldump\n}; - $MYSQLDUMP = $mytry; -} -$MYSQLDUMP or die qq{Could not find the mysqldump program\n}; - -## Flags we use for mysqldump -my @MYSQLDUMPARGS = qw( ---skip-lock-tables ---complete-insert ---skip-extended-insert ---skip-add-drop-table ---skip-add-locks ---skip-disable-keys ---skip-set-charset ---skip-comments ---skip-quote-names -); - - -$verbose and warn "Checking that mysqldump can handle our flags\n"; -## Make sure this version can handle all the flags we want. -## Combine with user dump below -my $MYSQLDUMPARGS = join ' ' => @MYSQLDUMPARGS; -## Argh. Any way to make this work on Win32? -my $version = qx{$MYSQLDUMP $MYSQLDUMPARGS 2>&1}; -if ($version =~ /unknown option/) { - die qq{Sorry, you need to use a newer version of the mysqldump program than the one at "$MYSQLDUMP"\n}; -} - -push @MYSQLDUMPARGS, "--user=$MYSQLUSER"; -length $MYSQLPASSWORD and push @MYSQLDUMPARGS, "--password=$MYSQLPASSWORD"; -length $MYSQLHOST and push @MYSQLDUMPARGS, "--host=$MYSQLHOST"; - -## Open the dump file to hold the mysqldump output -open my $mdump, '+>', $MYSQLDUMPFILE or die qq{Could not open "$MYSQLDUMPFILE": $!\n}; -print qq{Writing file "$MYSQLDUMPFILE"\n}; - -open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, '--no-data', $MYSQLDB; -my $oldselect = select $mdump; - -print while <$mfork2>; - -## Slurp in the current schema -my $current_schema; -seek $mdump, 0, 0; -{ - local $/; - $current_schema = <$mdump>; -} -seek $mdump, 0, 0; -truncate $mdump, 0; - -warn qq{Trying to determine database version...\n} if $verbose; - -my $current_version = 0; -if ($current_schema =~ /CREATE TABLE \S+cur /) { - $current_version = 103; -} -elsif ($current_schema =~ /CREATE TABLE \S+brokenlinks /) { - $current_version = 104; -} -elsif ($current_schema !~ /CREATE TABLE \S+templatelinks /) { - $current_version = 105; -} -elsif ($current_schema !~ /CREATE TABLE \S+validate /) { - $current_version = 106; -} -elsif ($current_schema !~ /ipb_auto tinyint/) { - $current_version = 107; -} -elsif ($current_schema !~ /CREATE TABLE \S+profiling /) { - $current_version = 108; -} -elsif ($current_schema !~ /CREATE TABLE \S+querycachetwo /) { - $current_version = 109; -} -else { - $current_version = $MW_DEFAULT_VERSION; -} - -if (!$current_version) { - warn qq{WARNING! Could not figure out the old version, assuming MediaWiki $MW_DEFAULT_VERSION\n}; - $current_version = $MW_DEFAULT_VERSION; -} - -## Check for a table prefix: -my $table_prefix = ''; -if ($current_schema =~ /CREATE TABLE (\S+)querycache /) { - $table_prefix = $1; -} - -warn qq{Old schema is from MediaWiki version $current_version\n} if $verbose; -warn qq{Table prefix is "$table_prefix"\n} if $verbose and length $table_prefix; - -$verbose and warn qq{Writing file "$MYSQLDUMPFILE"\n}; -my $now = scalar localtime; -my $conninfo = ''; -$MYSQLHOST and $conninfo .= "\n-- host $MYSQLHOST"; -$MYSQLSOCKET and $conninfo .= "\n-- socket $MYSQLSOCKET"; - -print qq{ --- Dump of MySQL Mediawiki tables for import into a Postgres Mediawiki schema --- Performed by the program: $0 --- Version: $VERSION --- Author: Greg Sabino Mullane Comments welcome --- --- This file was created: $now --- Executable used: $MYSQLDUMP --- Connection information: --- database: $MYSQLDB --- user: $MYSQLUSER$conninfo - --- This file can be imported manually with psql like so: --- psql -p port# -h hostname -U username -f $MYSQLDUMPFILE databasename --- This will overwrite any existing MediaWiki information, so be careful - -}; - -## psql specific stuff -print q{ -\\set ON_ERROR_STOP -BEGIN; -SET client_min_messages = 'WARNING'; -SET timezone = 'GMT'; -SET DateStyle = 'ISO, YMD'; -}; - -warn qq{Reading in the Postgres schema information\n} if $verbose; -open my $schema, '<', $PG_SCHEMA - or die qq{Could not open "$PG_SCHEMA": make sure this script is run from maintenance/postgres/\n}; -my $t; -while (<$schema>) { - if (/CREATE TABLE\s+(\S+)/) { - $t = $1; - $table{$t}={}; - $verbose > 1 and warn qq{ Found table $t\n}; - } - elsif (/^ +(\w+)\s+TIMESTAMP/) { - $tz{$t}{$1}++; - $verbose > 1 and warn qq{ Got a timestamp for column $1\n}; - } - elsif (/REFERENCES\s*([^( ]+)/) { - my $ref = $1; - exists $table{$ref} or die qq{No parent table $ref found for $t\n}; - $table{$t}{$ref}++; - } -} -close $schema or die qq{Could not close "$PG_SCHEMA": $!\n}; - -## Read in special cases and table/version information -$verbose and warn qq{Reading in schema exception information\n}; -my %version_tables; -while () { - if (/^VERSION\s+(\d+\.\d+):\s+(.+)/) { - my $list = join '|' => split /\s+/ => $2; - $version_tables{$1} = qr{\b$list\b}; - next; - } - next unless /^(\w+)\s*(.*)/; - $special{$1} = $2||''; - $special{$2} = $1 if length $2; -} - -## Determine the order of tables based on foreign key constraints -$verbose and warn qq{Figuring out order of tables to dump\n}; -my %dumped; -my $bail = 0; -{ - my $found=0; - T: for my $t (sort keys %table) { - next if exists $dumped{$t} and $dumped{$t} >= 1; - $found=1; - for my $dep (sort keys %{$table{$t}}) { - next T if ! exists $dumped{$dep} or $dumped{$dep} < 0; - } - $dumped{$t} = -1 if ! exists $dumped{$t}; - ## Skip certain tables that are not imported - next if exists $special{$t} and !$special{$t}; - push @torder, $special{$t} || $t; - } - last if !$found; - push @torder, '---'; - for (values %dumped) { $_+=2; } - die "Too many loops!\n" if $bail++ > 1000; - redo; -} - -## Prepare the Postgres database for the move -$verbose and warn qq{Writing Postgres transformation information\n}; - -print "\n-- Empty out all existing tables\n"; -$verbose and warn qq{Writing truncates to empty existing tables\n}; - - -for my $t (@torder, 'objectcache', 'querycache') { - next if $t eq '---'; - my $tname = $special{$t}||$t; - printf qq{TRUNCATE TABLE %-20s CASCADE;\n}, qq{"$tname"}; -} -print "\n\n"; - -print qq{-- Allow rc_ip to contain empty string, will convert at end\n}; -print qq{ALTER TABLE recentchanges ALTER rc_ip TYPE text USING host(rc_ip);\n\n}; - -print "-- Changing all timestamp fields to handle raw integers\n"; -for my $t (sort keys %tz) { - next if $t eq 'archive2'; - for my $c (sort keys %{$tz{$t}}) { - printf "ALTER TABLE %-18s ALTER %-25s TYPE TEXT;\n", $t, $c; - } -} -print "\n"; - -print q{ -INSERT INTO page VALUES (0,-1,'Dummy Page','',0,0,0,default,now(),0,10); -}; - -## If we have a table _prefix, we need to temporarily rename all of our Postgres -## tables temporarily for the import. Perhaps consider making this an auto-schema -## thing in the future. -if (length $table_prefix) { - print qq{\n\n-- Temporarily renaming tables to accomodate the table_prefix "$table_prefix"\n\n}; - for my $t (@torder) { - next if $t eq '---' or $t eq 'text' or $t eq 'user'; - my $tname = $special{$t}||$t; - printf qq{ALTER TABLE %-18s RENAME TO "${table_prefix}$tname";\n}, qq{"$tname"}; - } -} - - -## Try and dump the ill-named "user" table: -## We do this table alone because "user" is a reserved word. -print q{ - -SET escape_string_warning TO 'off'; -\\o /dev/null - --- Postgres uses a table name of "mwuser" instead of "user" - --- Create a dummy user to satisfy fk contraints especially with revisions -SELECT setval('user_user_id_seq',0,'false'); -INSERT INTO mwuser - VALUES (DEFAULT,'Anonymous','',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,now(),now()); - -}; - -push @MYSQLDUMPARGS, '--no-create-info'; - -$verbose and warn qq{Dumping "user" table\n}; -$verbose > 2 and warn Dumper \@MYSQLDUMPARGS; -my $usertable = "${table_prefix}user"; -open my $mfork, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, $usertable; -## Unfortunately, there is no easy way to catch errors -my $numusers = 0; -while (<$mfork>) { - ++$numusers and print if s/INSERT INTO $usertable/INSERT INTO mwuser/; -} -close $mfork; -if ($numusers < 1) { - warn qq{No users found, probably a connection error.\n}; - print qq{ERROR: No users found, connection failed, or table "$usertable" does not exist. Dump aborted.\n}; - close $mdump or die qq{Could not close "$MYSQLDUMPFILE": $!\n}; - exit; -} -print "\n-- Users loaded: $numusers\n\n-- Loading rest of the mediawiki schema:\n"; - -warn qq{Dumping all other tables from the MySQL schema\n} if $verbose; - -## Dump the rest of the tables, in chunks based on constraints -## We do not need the user table: -my @dumplist = grep { $_ ne 'user'} @torder; -my @alist; -{ - undef @alist; - PICKATABLE: { - my $tname = shift @dumplist; - ## XXX Make this dynamic below - for my $ver (sort {$b <=> $a } keys %version_tables) { - redo PICKATABLE if $tname =~ $version_tables{$ver}; - } - $tname = "${table_prefix}$tname" if length $table_prefix; - next if $tname !~ /^\w/; - push @alist, $tname; - $verbose and warn " $tname...\n"; - pop @alist and last if index($alist[-1],'---') >= 0; - redo if @dumplist; - } - - ## Dump everything else - open my $mfork2, '-|' or exec $MYSQLDUMP, @MYSQLDUMPARGS, $MYSQLDB, @alist; - print while <$mfork2>; - close $mfork2; - warn qq{Finished dumping from MySQL\n} if $verbose; - - redo if @dumplist; -} - -warn qq{Writing information to return Postgres database to normal\n} if $verbose; -print qq{ALTER TABLE ${table_prefix}recentchanges ALTER rc_ip TYPE cidr USING\n}; -print qq{ CASE WHEN rc_ip = '' THEN NULL ELSE rc_ip::cidr END;\n}; - -## Return tables to their original names if a table prefix was used. -if (length $table_prefix) { - print qq{\n\n-- Renaming tables by removing table prefix "$table_prefix"\n\n}; - my $maxsize = 18; - for (@torder) { - $maxsize = length "$_$table_prefix" if length "$_$table_prefix" > $maxsize; - } - for my $t (@torder) { - next if $t eq '---' or $t eq 'text' or $t eq 'user'; - my $tname = $special{$t}||$t; - printf qq{ALTER TABLE %*s RENAME TO "$tname";\n}, $maxsize+1, qq{"${table_prefix}$tname"}; - } -} - -print qq{\n\n--Returning timestamps to normal\n}; -for my $t (sort keys %tz) { - next if $t eq 'archive2'; - for my $c (sort keys %{$tz{$t}}) { - printf "ALTER TABLE %-18s ALTER %-25s TYPE timestamptz\n". - " USING TO_TIMESTAMP($c,'YYYYMMDDHHMISS');\n", $t, $c; - } -} - -## Reset sequences -print q{ -SELECT setval('filearchive_fa_id_seq', 1+coalesce(max(fa_id) ,0),false) FROM filearchive; -SELECT setval('ipblocks_ipb_id_seq', 1+coalesce(max(ipb_id) ,0),false) FROM ipblocks; -SELECT setval('job_job_id_seq', 1+coalesce(max(job_id) ,0),false) FROM job; -SELECT setval('logging_log_id_seq', 1+coalesce(max(log_id) ,0),false) FROM logging; -SELECT setval('page_page_id_seq', 1+coalesce(max(page_id),0),false) FROM page; -SELECT setval('page_restrictions_pr_id_seq', 1+coalesce(max(pr_id) ,0),false) FROM page_restrictions; -SELECT setval('recentchanges_rc_id_seq', 1+coalesce(max(rc_id) ,0),false) FROM recentchanges; -SELECT setval('revision_rev_id_seq', 1+coalesce(max(rev_id) ,0),false) FROM revision; -SELECT setval('text_old_id_seq', 1+coalesce(max(old_id) ,0),false) FROM "text"; -SELECT setval('user_user_id_seq', 1+coalesce(max(user_id),0),false) FROM mwuser; -}; - -print "COMMIT;\n\\o\n\n-- End of dump\n\n"; -select $oldselect; -close $mdump or die qq{Could not close "$MYSQLDUMPFILE": $!\n}; -exit; - - -__DATA__ -## Known remappings: either indicate the MySQL name, -## or leave blank if it should be skipped -mwuser user -archive2 -profiling -objectcache - -## Which tables to ignore depending on the version -VERSION 1.6: externallinks job templatelinks transcache -VERSION 1.7: filearchive langlinks querycache_info -VERSION 1.9: querycachetwo page_restrictions redirect -