penguincoder · April 2, 2009 04:34
diff --git a/pg2mysql.rb b/pg2mysql.rb
 #!/usr/bin/env ruby

 ##
 # Andrew Coleman <mercury at penguincoder dot org>
 # Released under the GPLv2 http://gnu.org
 # General purpose tranlator for pgsql -> mysql. Best used with Rails-like data.
 #
 # Use: pg_dump -DOx [dbname] > outputfile
 # to get a proper dump for this program.
 #
 # Allows for CREATE TABLE, INSERT INTO, and CREATE INDEX translations.
 # Keeps DEFAULT values and strips all Postgres style data casting (::).
 # Also can interpret multi-line INSERT statements (for those that use newlines
 # in text fields). The script will also include any columns from INHERIT tables
 # at the end of the CREATE TABLE clause.
 #
 # General Usage:
 #  ./pg2mysql.rb [input files] > mysql-compatible-dump
 # also:
 #  pg_dump -DOx [dbname] | ./pg2mysql.rb > mysql-compatible-dump
 #
 # Assumptions:
 #  * field 'id' is serial primary key and is converted as such.
 #  * all tables are converted to default type. modify configuration as such.
 #

 # if you use more data types than this, add them here.
 # greedy matching. make sure the order in the array is right to get the
 # right data conversions. just look at the ordering of the character match.
 $conversion_table = [
  [ /integer|serial/, 'int(11)' ],
  [ /character\(([0-9]+)\)/, 'char($1)' ],
  [ /character varying\(([0-9]+)\)/, 'varchar($1)' ],
  [ /character varying/, 'tinytext' ],
  [ /timestamp with(out)? time zone/, 'timestamp null default null' ],
  [ /time with(out)? time zone/, 'time' ],
  [ /text/, 'text' ],
  [ /date/, 'date' ],
  [ /double precision/, 'double precision' ],
  [ /real/, 'double precision' ],
  [ /boolean/, 'tinyint(1)' ],
  [ /numeric\(([0-9,]+)\)/, 'decimal($1)' ]
 ]

 ##
 # Returns a string for the converted MySQL type from the given Postgres
 # data type. This will preserve the DEFAULT values also, but it does remove
 # all data casting.
 #
 def convert_to_mysql_type(type)
  row = $conversion_table.detect { |r| r.first.match(type) }
  if row.nil?
    $stderr.puts "COULD NOT FIND MYSQL TYPE FOR '#{type}'"
    exit 1
  end
  res = type.gsub(row.first, row.last)
  res.gsub!('$1', $1) if $1
  if res =~ /varchar\(([0-9]+)\)/ and $1.to_i > 100
    res = 'mediumtext' # most hackish part of this program...
  end
  res
 end

 # flags and constants used in the loop
 defining_table = false # knows to collect column names instead of other SQL
 in_insert = false # flag for determining if it is a multi-line insert
 table_name = nil # current name of the table
 table_fields = {} # hash of column names grouped by table_name

 $<.each do |line| # reads from stdin or all files given on the command line
  out_line = '' # the line to print out
  if defining_table
    next if line =~ /^\)$/ # skip for inherits
    if line =~ /^\);$/
      defining_table = false # finished defining table
      # join columns together... so you can skip the last comma.
      out_line = "#{table_fields[table_name].join(",\n")}\n);"
    elsif line =~ /^INHERITS \(([\w, ]+)\);/
      defining_table = false
      # add in inherited parent columns
      $1.split(/\, ?/).each { |p| table_fields[p].each { |f|
        next if table_fields[table_name].include?(f)
        table_fields[table_name] << f } }
      out_line = "#{table_fields[table_name].join(",\n")}\n);" # join cols
    else
      line.gsub!(/NULL::.+$/, 'NULL,') # remove postgres casting
      field = line.split.first # get the first word (column name)
      type = line.gsub(field, '').strip # remove the column name
      type.chop! if type =~ /\,$/ # remove the comma from the end
      mysql_type = convert_to_mysql_type(type) # convert to mysql type
      # for session_id, we need a limit
      mysql_type = 'varchar(255)' if field == 'session_id'
      fname = "  #{field} #{mysql_type}" # get output line for data type
      # assume that the 'id' field should be built just like this
      fname = '  id serial primary key' if field == 'id'
      table_fields[table_name] << fname # add the line to the data column array
    end
  elsif line =~ /^CREATE TABLE \"?(\w+)\"?/
    defining_table = true # flag to start defining a table
    table_name = $1 # set the table name from the regex
    table_fields[table_name] ||= [] # basic array of empty columns
    out_line = line
  elsif line =~ /^CREATE INDEX (\w+) ON (\w+) USING (\w+) \((\w+)\);$/
    # create an index on a column, have to flip the words around
    out_line = "CREATE INDEX #{$1} USING #{$3} ON #{$2} (#{$4});"
  elsif in_insert or line =~ /^INSERT INTO/
    in_insert = true # flag used for multi-line insert statements
    out_line = line # add a row into a table
    in_insert = false if line =~ /\);$/
  end
  # put out the line unless there is no line to process. also convert
  # postgres (") into mysql (`)
  # also, postgres has strings like this: E'some string'
  puts out_line.gsub('"', '`').gsub(/ [A-Z]\'/, " '") unless out_line.empty?
 end
	#!/usr/bin/env ruby

	##
	# Andrew Coleman <mercury at penguincoder dot org>
	# Released under the GPLv2 http://gnu.org
	# General purpose tranlator for pgsql -> mysql. Best used with Rails-like data.
	#
	# Use: pg_dump -DOx [dbname] > outputfile
	# to get a proper dump for this program.
	#
	# Allows for CREATE TABLE, INSERT INTO, and CREATE INDEX translations.
	# Keeps DEFAULT values and strips all Postgres style data casting (::).
	# Also can interpret multi-line INSERT statements (for those that use newlines
	# in text fields). The script will also include any columns from INHERIT tables
	# at the end of the CREATE TABLE clause.
	#
	# General Usage:
	# ./pg2mysql.rb [input files] > mysql-compatible-dump
	# also:
	# pg_dump -DOx [dbname] \| ./pg2mysql.rb > mysql-compatible-dump
	#
	# Assumptions:
	# * field 'id' is serial primary key and is converted as such.
	# * all tables are converted to default type. modify configuration as such.
	#

	# if you use more data types than this, add them here.
	# greedy matching. make sure the order in the array is right to get the
	# right data conversions. just look at the ordering of the character match.
	$conversion_table = [
	[ /integer\|serial/, 'int(11)' ],
	[ /character\(([0-9]+)\)/, 'char($1)' ],
	[ /character varying\(([0-9]+)\)/, 'varchar($1)' ],
	[ /character varying/, 'tinytext' ],
	[ /timestamp with(out)? time zone/, 'timestamp null default null' ],
	[ /time with(out)? time zone/, 'time' ],
	[ /text/, 'text' ],
	[ /date/, 'date' ],
	[ /double precision/, 'double precision' ],
	[ /real/, 'double precision' ],
	[ /boolean/, 'tinyint(1)' ],
	[ /numeric\(([0-9,]+)\)/, 'decimal($1)' ]
	]

	##
	# Returns a string for the converted MySQL type from the given Postgres
	# data type. This will preserve the DEFAULT values also, but it does remove
	# all data casting.
	#
	def convert_to_mysql_type(type)
	row = $conversion_table.detect { \|r\| r.first.match(type) }
	if row.nil?
	$stderr.puts "COULD NOT FIND MYSQL TYPE FOR '#{type}'"
	exit 1
	end
	res = type.gsub(row.first, row.last)
	res.gsub!('$1', $1) if $1
	if res =~ /varchar\(([0-9]+)\)/ and $1.to_i > 100
	res = 'mediumtext' # most hackish part of this program...
	end
	res
	end

	# flags and constants used in the loop
	defining_table = false # knows to collect column names instead of other SQL
	in_insert = false # flag for determining if it is a multi-line insert
	table_name = nil # current name of the table
	table_fields = {} # hash of column names grouped by table_name

	$<.each do \|line\| # reads from stdin or all files given on the command line
	out_line = '' # the line to print out
	if defining_table
	next if line =~ /^\)$/ # skip for inherits
	if line =~ /^\);$/
	defining_table = false # finished defining table
	# join columns together... so you can skip the last comma.
	out_line = "#{table_fields[table_name].join(",\n")}\n);"
	elsif line =~ /^INHERITS \(([\w, ]+)\);/
	defining_table = false
	# add in inherited parent columns
	$1.split(/\, ?/).each { \|p\| table_fields[p].each { \|f\|
	next if table_fields[table_name].include?(f)
	table_fields[table_name] << f } }
	out_line = "#{table_fields[table_name].join(",\n")}\n);" # join cols
	else
	line.gsub!(/NULL::.+$/, 'NULL,') # remove postgres casting
	field = line.split.first # get the first word (column name)
	type = line.gsub(field, '').strip # remove the column name
	type.chop! if type =~ /\,$/ # remove the comma from the end
	mysql_type = convert_to_mysql_type(type) # convert to mysql type
	# for session_id, we need a limit
	mysql_type = 'varchar(255)' if field == 'session_id'
	fname = " #{field} #{mysql_type}" # get output line for data type
	# assume that the 'id' field should be built just like this
	fname = ' id serial primary key' if field == 'id'
	table_fields[table_name] << fname # add the line to the data column array
	end
	elsif line =~ /^CREATE TABLE \"?(\w+)\"?/
	defining_table = true # flag to start defining a table
	table_name = $1 # set the table name from the regex
	table_fields[table_name] \|\|= [] # basic array of empty columns
	out_line = line
	elsif line =~ /^CREATE INDEX (\w+) ON (\w+) USING (\w+) \((\w+)\);$/
	# create an index on a column, have to flip the words around
	out_line = "CREATE INDEX #{$1} USING #{$3} ON #{$2} (#{$4});"
	elsif in_insert or line =~ /^INSERT INTO/
	in_insert = true # flag used for multi-line insert statements
	out_line = line # add a row into a table
	in_insert = false if line =~ /\);$/
	end
	# put out the line unless there is no line to process. also convert
	# postgres (") into mysql (`)
	# also, postgres has strings like this: E'some string'
	puts out_line.gsub('"', '`').gsub(/ [A-Z]\'/, " '") unless out_line.empty?
	end