Created
April 2, 2009 04:34
-
-
Save penguincoder/89028 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
## | |
# Andrew Coleman <mercury at penguincoder dot org> | |
# Released under the GPLv2 http://gnu.org | |
# General purpose tranlator for pgsql -> mysql. Best used with Rails-like data. | |
# | |
# Use: pg_dump -DOx [dbname] > outputfile | |
# to get a proper dump for this program. | |
# | |
# Allows for CREATE TABLE, INSERT INTO, and CREATE INDEX translations. | |
# Keeps DEFAULT values and strips all Postgres style data casting (::). | |
# Also can interpret multi-line INSERT statements (for those that use newlines | |
# in text fields). The script will also include any columns from INHERIT tables | |
# at the end of the CREATE TABLE clause. | |
# | |
# General Usage: | |
# ./pg2mysql.rb [input files] > mysql-compatible-dump | |
# also: | |
# pg_dump -DOx [dbname] | ./pg2mysql.rb > mysql-compatible-dump | |
# | |
# Assumptions: | |
# * field 'id' is serial primary key and is converted as such. | |
# * all tables are converted to default type. modify configuration as such. | |
# | |
# if you use more data types than this, add them here. | |
# greedy matching. make sure the order in the array is right to get the | |
# right data conversions. just look at the ordering of the character match. | |
$conversion_table = [ | |
[ /integer|serial/, 'int(11)' ], | |
[ /character\(([0-9]+)\)/, 'char($1)' ], | |
[ /character varying\(([0-9]+)\)/, 'varchar($1)' ], | |
[ /character varying/, 'tinytext' ], | |
[ /timestamp with(out)? time zone/, 'timestamp null default null' ], | |
[ /time with(out)? time zone/, 'time' ], | |
[ /text/, 'text' ], | |
[ /date/, 'date' ], | |
[ /double precision/, 'double precision' ], | |
[ /real/, 'double precision' ], | |
[ /boolean/, 'tinyint(1)' ], | |
[ /numeric\(([0-9,]+)\)/, 'decimal($1)' ] | |
] | |
## | |
# Returns a string for the converted MySQL type from the given Postgres | |
# data type. This will preserve the DEFAULT values also, but it does remove | |
# all data casting. | |
# | |
def convert_to_mysql_type(type) | |
row = $conversion_table.detect { |r| r.first.match(type) } | |
if row.nil? | |
$stderr.puts "COULD NOT FIND MYSQL TYPE FOR '#{type}'" | |
exit 1 | |
end | |
res = type.gsub(row.first, row.last) | |
res.gsub!('$1', $1) if $1 | |
if res =~ /varchar\(([0-9]+)\)/ and $1.to_i > 100 | |
res = 'mediumtext' # most hackish part of this program... | |
end | |
res | |
end | |
# flags and constants used in the loop | |
defining_table = false # knows to collect column names instead of other SQL | |
in_insert = false # flag for determining if it is a multi-line insert | |
table_name = nil # current name of the table | |
table_fields = {} # hash of column names grouped by table_name | |
$<.each do |line| # reads from stdin or all files given on the command line | |
out_line = '' # the line to print out | |
if defining_table | |
next if line =~ /^\)$/ # skip for inherits | |
if line =~ /^\);$/ | |
defining_table = false # finished defining table | |
# join columns together... so you can skip the last comma. | |
out_line = "#{table_fields[table_name].join(",\n")}\n);" | |
elsif line =~ /^INHERITS \(([\w, ]+)\);/ | |
defining_table = false | |
# add in inherited parent columns | |
$1.split(/\, ?/).each { |p| table_fields[p].each { |f| | |
next if table_fields[table_name].include?(f) | |
table_fields[table_name] << f } } | |
out_line = "#{table_fields[table_name].join(",\n")}\n);" # join cols | |
else | |
line.gsub!(/NULL::.+$/, 'NULL,') # remove postgres casting | |
field = line.split.first # get the first word (column name) | |
type = line.gsub(field, '').strip # remove the column name | |
type.chop! if type =~ /\,$/ # remove the comma from the end | |
mysql_type = convert_to_mysql_type(type) # convert to mysql type | |
# for session_id, we need a limit | |
mysql_type = 'varchar(255)' if field == 'session_id' | |
fname = " #{field} #{mysql_type}" # get output line for data type | |
# assume that the 'id' field should be built just like this | |
fname = ' id serial primary key' if field == 'id' | |
table_fields[table_name] << fname # add the line to the data column array | |
end | |
elsif line =~ /^CREATE TABLE \"?(\w+)\"?/ | |
defining_table = true # flag to start defining a table | |
table_name = $1 # set the table name from the regex | |
table_fields[table_name] ||= [] # basic array of empty columns | |
out_line = line | |
elsif line =~ /^CREATE INDEX (\w+) ON (\w+) USING (\w+) \((\w+)\);$/ | |
# create an index on a column, have to flip the words around | |
out_line = "CREATE INDEX #{$1} USING #{$3} ON #{$2} (#{$4});" | |
elsif in_insert or line =~ /^INSERT INTO/ | |
in_insert = true # flag used for multi-line insert statements | |
out_line = line # add a row into a table | |
in_insert = false if line =~ /\);$/ | |
end | |
# put out the line unless there is no line to process. also convert | |
# postgres (") into mysql (`) | |
# also, postgres has strings like this: E'some string' | |
puts out_line.gsub('"', '`').gsub(/ [A-Z]\'/, " '") unless out_line.empty? | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment