Commit b9bbad52 authored by Jacob Vosmaer's avatar Jacob Vosmaer
Browse files

Integrate mysql-to-postgres conversion

parent bf1e976d
require 'yaml'
require 'open3'
module Backup
class Database
......@@ -17,7 +18,7 @@ module Backup
FileUtils.mkdir_p(@db_dir) unless Dir.exists?(@db_dir)
end
def dump
def dump(mysql_to_postgresql=false)
FileUtils.rm_f(db_file_name)
compress_rd, compress_wr = IO.pipe
compress_pid = spawn(*%W(gzip -c), in: compress_rd, out: [db_file_name, 'w', 0600])
......@@ -26,7 +27,9 @@ module Backup
dump_pid = case config["adapter"]
when /^mysql/ then
$progress.print "Dumping MySQL database #{config['database']} ... "
spawn('mysqldump', *mysql_args, config['database'], *TABLES, out: compress_wr)
args = mysql_args
args << '--compatible=postgresql' if mysql_to_postgresql
spawn('mysqldump', *args, config['database'], *TABLES, out: compress_wr)
when "postgresql" then
$progress.print "Dumping PostgreSQL database #{config['database']} ... "
pg_env
......@@ -38,6 +41,42 @@ module Backup
report_success(success)
abort 'Backup failed' unless success
convert_to_postgresql if mysql_to_postgresql
end
def convert_to_postgresql
mysql_dump_gz = db_file_name + '.mysql'
psql_dump_gz = db_file_name + '.psql'
drop_indexes_sql = File.join(db_dir, 'drop_indexes.sql')
File.rename(db_file_name, mysql_dump_gz)
$progress.print "Converting MySQL database dump to Postgres ... "
statuses = Open3.pipeline(
%W(gzip -cd #{mysql_dump_gz}),
%W(python lib/support/mysql-postgresql-converter/db_converter.py - - #{drop_indexes_sql}),
%W(gzip -c),
out: [psql_dump_gz, 'w', 0600]
)
if !statuses.compact.all?(&:success?)
abort "mysql-to-postgresql-converter failed"
end
$progress.puts '[DONE]'.green
$progress.print "Splicing in 'DROP INDEX' statements ... "
statuses = Open3.pipeline(
%W(lib/support/mysql-postgresql-converter/splice_drop_indexes #{psql_dump_gz} #{drop_indexes_sql}),
%W(gzip -c),
out: [db_file_name, 'w', 0600]
)
if !statuses.compact.all?(&:success?)
abort "Failed to splice in 'DROP INDEXES' statements"
end
$progress.puts '[DONE]'.green
ensure
FileUtils.rm_f([mysql_dump_gz, psql_dump_gz, drop_indexes_sql])
end
def restore
......
......@@ -35,17 +35,14 @@ First, dump your MySQL database in PostgreSQL-compatible format
Then, convert it using the dbconverter.py script.
python db_converter.py databasename.mysql databasename.psql
python db_converter.py databasename.mysql - drop_indexes.sql | gzip -c > databasename.unfinished.psql.gz
It'll print progress to the terminal
Now we have a DB dump that can be imported but the dump will be slow due
to existing indexes. We use 'ed' to edit the DB dump file and move the
'DROP INDEX' statements to the start of the import. Ed is not the fastest
tool for this job if your DB dump is multiple gigabytes. (Patches to
the converter are welcome!)
to existing indexes.
ed -s databasename.psql < move_drop_indexes.ed
./splice_drop_indexes databasename.unfinished.psql.gz drop_indexes.sql > databasename.psql
Next, load your new dump into a fresh PostgreSQL database using:
......
......@@ -15,7 +15,7 @@ import time
import subprocess
def parse(input_filename, output_filename):
def parse(input_filename, output_filename, drop_index_filename):
"Feed it a file, and it'll output a fixed one"
# State storage
......@@ -44,6 +44,8 @@ def parse(input_filename, output_filename):
output = open(output_filename, "w")
logging = sys.stdout
drop_index = open(drop_index_filename, "w")
if input_filename == "-":
input_fh = sys.stdin
else:
......@@ -234,12 +236,9 @@ def parse(input_filename, output_filename):
for line in sequence_lines:
output.write("%s;\n" % line)
# This line is an anchor for move_drop_indexes.ed
output.write("\n-- Drop indexes --\n")
drop_index.write("-- Drop indexes --\n")
for line in drop_index_lines:
output.write("%s;\n" % line)
# This line is an anchor for move_drop_indexes.ed
output.write("-- END Drop indexes --\n")
drop_index.write("%s;\n" % line)
# Write indexes out
output.write("\n-- Indexes --\n")
......@@ -253,4 +252,4 @@ def parse(input_filename, output_filename):
if __name__ == "__main__":
parse(sys.argv[1], sys.argv[2])
parse(sys.argv[1], sys.argv[2], sys.argv[3])
#!/bin/sh
# This script reorders database dumps generated by db_converter.py for
# efficient consumption by Postgres.
fail() {
echo "$@" 1>2
exit 1
}
db_gz=$1
drop_indexes_sql=$2
if [ -z "$db_gz" ] || [ -z "$drop_indexes_sql" ] ; then
fail "Usage: $0 database.sql.gz drop_indexes.sql"
fi
# Capture all text up to the first occurence of 'SET CONSTRAINTS'
preamble=$(zcat "$db_gz" | sed '/SET CONSTRAINTS/q')
if [ -z "$preamble" ] ; then
fail "Could not read preamble"
fi
drop_indexes=$(cat "$drop_indexes_sql")
if [ -z "$drop_indexes" ] ; then
fail "Could not read DROP INDEXES file"
fi
# Print preamble and drop indexes
cat <<EOF
${preamble}
${drop_indexes}
EOF
# Print the rest of database.sql.gz. I don't understand this awk script but it
# prints all lines after the first match of 'SET CONSTRAINTS'.
zcat "$db_gz" | awk 'f; /SET CONSTRAINTS/ { f = 1 }'
......@@ -3,13 +3,14 @@ namespace :backup do
desc "GITLAB | Create a backup of the GitLab CI database"
task create: :environment do
configure_cron_mode
mysql_to_postgresql = (ENV['MYSQL_TO_POSTGRESQL'] == '1')
$progress.puts "Applying final database migrations ... ".blue
Rake::Task['db:migrate'].invoke
$progress.puts "done".green
$progress.puts "Dumping database ... ".blue
Backup::Database.new.dump
Backup::Database.new.dump(mysql_to_postgresql)
$progress.puts "done".green
$progress.puts "Dumping builds ... ".blue
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment