Converting MySQL to Postgresql

Postgres 9.5 brought some great new features. A really nice one is IMPORT FOREIGN SCHEMA.

Postgres has long had the ability to treat other data sources as if they’re part of the database, but setting it up was a bit of a pain, as you had to declare each table you wanted to use manually.

The new command can just be pointed at a foreign data source and it automagically sets up all the tables or chosen ones in moments.

As I had a need to convert a MySQL database to Postgres, I decided to try using the new feature to do it. Connect to MySQL, import the schema, do a bunch of CREATE… SELECT * and you’re done, right?

Turned out to be a little fiddlier, as I wanted to import default values, and to convert tinyints to Booleans, but not too bad. So for anyone curious how it works, or who wants to actaully do such an import, the script is below. It’s a Ruby file that you can just run from Console. All you have to do is to set up the MySQL server you want to import from as source in your database.yml first.

class SourceDB < ActiveRecord::Base
  self.abstract_class = true
  establish_connection :source
end

def q query
  SourceDB.connection.execute query
end

config = SourceDB.connection_config

tbls = q "select table_name from information_schema.tables where table_schema = '#{config[:database]}'"

query = """BEGIN;
DROP SCHEMA public CASCADE; CREATE SCHEMA public;
DROP SCHEMA IF EXISTS mysql; create schema mysql;

create extension mysql_fdw;
create server mysql      FOREIGN DATA WRAPPER mysql_fdw
     OPTIONS (host '#{config[:host]}', port '#{config[:port] || 3306}');
create user mapping for public server mysql options(username '#{config[:username]}', password '#{config[:password]}');
IMPORT foreign schema \"#{config[:database]}\" from server mysql into \"mysql\";
""" +
tbls.map do |tbl|
  tbl = tbl.first
      """
      DO
$do$
BEGIN
CREATE TABLE #{tbl} AS SELECT * FROM mysql.#{tbl};
       IF POSITION('test' in current_database()) <> 0 THEN
       	  TRUNCATE #{tbl};
	  END IF;
	  BEGIN
		CREATE SEQUENCE IF NOT EXISTS #{tbl}_seq;
		       ALTER TABLE #{tbl} ADD PRIMARY KEY(id), ALTER COLUMN id SET DEFAULT NEXTVAL('#{tbl}_seq');
		       EXCEPTION
		       WHEN OTHERS THEN
    RAISE NOTICE 'NOT CREATING SEQUENCE on #{tbl}';
    -- do nothing
    END;
END;
$do$;
END;"""
end * '' + "INSERT INTO SCHEMA_MIGRATIONS SELECT * FROM mysql.schema_migrations;"

bools = q "SELECT table_name, column_name FROM INFORMATION_SCHEMA.columns WHERE data_type = 'tinyint' AND table_schema = '#{config[:database]}'"
bools.each do |table, field|
  query += """
  DO
  $do$
  BEGIN
    ALTER TABLE #{table} DROP COLUMN #{field};
    ALTER TABLE #{table} ADD COLUMN #{field} BOOLEAN;
    UPDATE #{table}
    SET #{field} =
      CASE
        WHEN dest.#{field} = 1 THEN TRUE
        WHEN dest.#{field} = 0 THEN FALSE
      ELSE NULL
      END
    FROM mysql.#{table} dest where #{table}.id = dest.id;
  END;
  $do$;"""
      end
      defaults = q "SELECT table_name, column_name, column_default, data_type FROM INFORMATION_SCHEMA.columns WHERE table_schema = '#{config[:database]}' AND column_default IS NOT NULL"

      defaults.each do |tbl, col, defn, type|
        if type == 'tinyint'
          if defn == '0'
            query += "ALTER TABLE #{tbl} ALTER COLUMN #{col} SET DEFAULT FALSE;"
          elsif defn == '1'
            query += "ALTER TABLE #{tbl} ALTER COLUMN #{col} SET DEFAULT TRUE;"
          end
        else
          query += "ALTER TABLE #{tbl} ALTER COLUMN #{col} SET DEFAULT '#{defn}';"
        end
      end

      query += """
DO
 $do$
 DECLARE
   cmd varchar;
 BEGIN
 FOR cmd IN (SELECT 'ALTER SEQUENCE '|| quote_ident(MIN(schema_name)) ||'.'|| quote_ident(MIN(seq_name))
        ||' OWNED BY '|| quote_ident(MIN(TABLE_NAME)) ||'.'|| quote_ident(MIN(column_name)) ||';'
 FROM (
     SELECT
         n.nspname AS schema_name,
         c.relname AS TABLE_NAME,
         a.attname AS column_name,
         SUBSTRING(d.adsrc FROM E'^nextval\\(''([^'']*)''(?:::text|::regclass)?\\)') AS seq_name
     FROM pg_class c
     JOIN pg_attribute a ON (c.oid=a.attrelid)
     JOIN pg_attrdef d ON (a.attrelid=d.adrelid AND a.attnum=d.adnum)
     JOIN pg_namespace n ON (c.relnamespace=n.oid)
     WHERE has_schema_privilege(n.oid,'USAGE')
       AND n.nspname NOT LIKE 'pg!_%' escape '!'
       AND has_table_privilege(c.oid,'SELECT')
       AND (NOT a.attisdropped)
       AND d.adsrc ~ '^nextval'
 ) seq
 GROUP BY seq_name HAVING COUNT(*)=1)
 LOOP
   EXECUTE cmd;
 END LOOP;
 END;
 $do$;
 END;

 DO
 $do$
 DECLARE
   cmd varchar;
 BEGIN
   FOR cmd IN (
 SELECT 'SELECT SETVAL(' ||
        quote_literal(quote_ident(PGT.schemaname) || '.' || quote_ident(S.relname)) ||
        ', COALESCE(MAX(' ||quote_ident(C.attname)|| '), 1) ) FROM ' ||
        quote_ident(PGT.schemaname)|| '.'||quote_ident(T.relname)|| ';'
 FROM pg_class AS S,
      pg_depend AS D,
      pg_class AS T,
      pg_attribute AS C,
      pg_tables AS PGT
 WHERE S.relkind = 'S'
     AND S.oid = D.objid
     AND D.refobjid = T.oid
     AND D.refobjid = C.attrelid
     AND D.refobjsubid = C.attnum
     AND T.relname = PGT.tablename
 ORDER BY S.relname)
 LOOP
 EXECUTE cmd;
 END LOOP;
 END;
 $do$;
COMMIT;
"""

begin
ActiveRecord::Base.connection.execute query
rescue Exception => e
  p e.inspect
end