Created
April 10, 2014 17:24
-
-
Save tomriley/10404172 to your computer and use it in GitHub Desktop.
Migrate a Mongoid / MongoDB database to an ActiveRecord based SQL one. One method to convert the schema, another to migrate data. Copes with most basic data types. Some hacks to infer TEXT columns. Converts embeds_one relationships to AR aggregations. I wrote this for a one-time migration. It could be a good starting point for someone doing the …
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Migrate schema and data from Mongoid to MySQL ActiveRecord | |
class MongoidMysqlMigrator | |
def randomize_auto_increment_values(source_models, from=5500, to=10500) | |
source_models.each do |model| | |
value = rand(from-to)+from | |
sql = %(ALTER TABLE #{model.name.tableize} AUTO_INCREMENT=#{value}) | |
puts sql | |
ActiveRecord::Base.connection.execute(sql) | |
end | |
end | |
# @param sources (Array) list of Mongoid document types | |
def migrate_data(sources) | |
sources.each do |source| | |
size = source.count | |
migrate_source_collection(source, source.all) if size > 0 | |
end | |
sources.each do |source_model| | |
puts "Resolving foreign keys for #{source_model}" | |
target_class = define_active_record_model_class(source_model) | |
each_belongs_to(source_model) do |relation| | |
field_name = relation.foreign_key | |
if relation.polymorphic? | |
# Get the list of distinct class names from the polymophic association _type | |
# fields and perform an update for each | |
class_names = target_class.group("#{relation.name}_type").map(&"#{relation.name}_type".to_sym) | |
class_names.map(&:tableize).each do |rel_target_table| | |
sql = %(UPDATE #{target_class.table_name} t1 | |
INNER JOIN #{rel_target_table} t2 ON | |
t2.mongo_id = t1.#{field_name}_mongo_id | |
SET t1.#{field_name} = t2.id) | |
#puts sql | |
ActiveRecord::Base.connection.execute(sql) | |
end | |
else | |
rel_target_table = relation.class_name.tableize | |
sql = %(UPDATE #{target_class.table_name} t1 | |
INNER JOIN #{rel_target_table} t2 ON | |
t2.mongo_id = t1.#{field_name}_mongo_id | |
SET t1.#{field_name} = t2.id) | |
#puts sql | |
ActiveRecord::Base.connection.execute(sql) | |
end | |
end | |
# Wire up embedded docs to parent | |
each_embedded_relation(source_model) do |relation| | |
embedded_class_name = relation.class_name | |
embedded_class = embedded_class_name.constantize | |
embedded_mysql_class = define_active_record_model_class(embedded_class) | |
embedded_ins = embedded_in_relations(embedded_class) | |
raise "too many embedded_in relations" unless embedded_ins.size == 1 | |
# There's no foreign key on the embedded doc so we infer it from | |
# the relation name. For polymorphic embeds, this might be something | |
# like parent_id or addressable_id | |
rel_name = embedded_ins.values.first.name | |
field_name = "#{rel_name}_id" | |
rel_target_table = source_model.name.tableize | |
sql = %(UPDATE #{embedded_mysql_class.table_name} SET #{field_name}=( | |
SELECT id FROM #{rel_target_table} WHERE | |
#{rel_target_table}.mongo_id = #{embedded_mysql_class.table_name}.#{field_name}_mongo_id | |
)) | |
ActiveRecord::Base.connection.execute(sql) | |
#puts sql | |
end | |
end | |
end | |
def truncate_targets(sources) | |
sources.each do |source| | |
target_class = define_active_record_model_class(source) | |
target_class.destroy_all | |
end | |
end | |
# @param sources (Array) list of Mongoid document types | |
def migrate_schema(mongoid_models) | |
# Build MySQL schema | |
mongoid_models.each do |mongoid_model| | |
table = mongoid_model.name.tableize | |
# Drop and create SQL table | |
migrator.create_table table, :force => true | |
# Add mongo id column | |
migrator.add_column table, :mongo_id, :string | |
migrator.add_index table, :mongo_id | |
# Add data fields | |
each_basic_field(mongoid_model) do |field| | |
migrate_field_to_table(table, field) | |
end | |
# Add foreign key fields | |
mongoid_model.relations.each do |rel_name, relation| | |
if relation.relation == Mongoid::Relations::Referenced::In || | |
relation.relation == Mongoid::Relations::Embedded::In | |
field = relation.foreign_key unless Mongoid::Relations::Embedded::In | |
field ||= "#{relation.name}_id" | |
migrator.add_column table, field, :integer | |
migrator.add_index table, field, :name => field | |
if relation.relation == Mongoid::Relations::Embedded::In && relation.polymorphic? | |
type_field = "#{relation.name}_type" | |
migrator.add_column table, type_field, :string | |
migrator.add_index table, type_field, :name => type_field | |
end | |
migrator.add_column table, field+"_mongo_id", :string, :length => 30 | |
migrator.add_index table, field+"_mongo_id", :name => field+"_mongo_id" | |
end | |
if relation.relation == Mongoid::Relations::Embedded::One | |
# See if we need to convert this embeds_one into a composed_of | |
klass = relation.class_name.constantize | |
#if composable_types.include? [relation.inverse_class_name.constantize, klass] | |
prefix = relation.name | |
# Migrate all the embedded classes fields into the target | |
# table but with prefixed field names | |
each_basic_field(klass) do |field| | |
migrate_field_to_table(table, field, "#{prefix}_#{field.name}") | |
end | |
#end | |
end | |
end | |
# Carrier wave uploaders | |
each_uploader(mongoid_model) do |field, uploader_type| | |
migrator.add_column table, field, :string | |
end | |
end | |
end | |
private | |
def migrate_field_to_table(table, mongoid_field, field_name = nil) | |
indexed_fields = mongoid_field.options[:klass].index_options.keys.map(&:keys).map(&:first) | |
toSqlType = { | |
String => :string, | |
Time => :datetime, | |
Integer => :integer, | |
Boolean => :boolean, | |
::Money => [:decimal, :precision => 8, :scale => 2, :default => 0.00], | |
BigDecimal => [:decimal, :precision => 8, :scale => 3, :default => 0.000], | |
Date => :date, | |
Hash => :text, | |
Array => :text | |
} | |
field_name ||= mongoid_field.name | |
type = mongoid_field.type | |
sql_type = toSqlType[type] | |
default = mongoid_field.options[:default] | |
default = nil if default.is_a?(Proc) || sql_type == :text | |
null = default.nil? | |
null = true if sql_type == :text | |
# Switch some String colums up to text | |
if sql_type == :string && | |
(field_name.include?('notes') || | |
field_name == 'value' || | |
field_name == 'content' || | |
field_name.include?('_footer') || | |
field_name.include?('_extra_text')) | |
sql_type = :text | |
default = nil | |
null = true | |
end | |
column_options = { | |
:default => default, | |
:null => null | |
} | |
if sql_type.is_a?(Array) | |
column_options.merge!(sql_type[1]) | |
sql_type = sql_type.first | |
end | |
if sql_type | |
migrator.add_column table, field_name, sql_type, column_options | |
if indexed_fields.include? field_name.to_sym | |
migrator.add_index table, field_name, :name => field_name | |
end | |
else | |
puts colorize("Skipped field #{table}##{field_name} with unknown type #{type}", COLORS[:red]) | |
end | |
end | |
def embedded_in_relations(model) | |
model.relations.select { |k,v| v.relation == Mongoid::Relations::Embedded::In } | |
end | |
# @param source_model (Class) the Mongoid model class | |
# @param collection (Array) the model instances to migrate | |
def migrate_source_collection(source_model, collection) | |
target_class = define_active_record_model_class(source_model) | |
#target_class.destroy_all | |
collection.each do |source| | |
target = migrate_source_document(source, target_class) | |
# Now step through embedded singletons and collections, converting those... | |
each_embedded_relation(source_model) do |relation| | |
puts "Migrating embedded #{source_model} #{source.id} #{relation.name}" | |
embedded_class = relation.class_name.constantize | |
if relation.relation == Mongoid::Relations::Embedded::One# && | |
# composable_types.include? [source_model, embedded_class] | |
puts "Migrating to aggregation #{target.class} #{embedded_class}" | |
# it's to be converted to a composed_of | |
prefix = relation.name | |
embedded_doc = source.send(relation.name) | |
if embedded_doc | |
each_basic_field(embedded_class) do |field| | |
copy_field_value(embedded_doc, target, field, "#{prefix}_#{field.name}") | |
end | |
end | |
target.save! | |
else | |
value = source.send(relation.name) | |
value = [value] if relation.relation == Mongoid::Relations::Embedded::One | |
value = value.compact # Embedded document might not exist | |
migrate_source_collection(embedded_class, value) do |target| | |
# No foreign key back to the parent document exists for | |
# embedded documents so we need to set it manually here | |
foreign_key_name = source_model.name.tableize.singularize+'_id' | |
target.send("#{foreign_key_name}_mongo_id=", source.id.to_s) | |
end | |
end | |
end | |
end | |
end | |
def copy_field_value(source, target, mongoid_field, target_field_name = nil) | |
target_field_name ||= mongoid_field.name | |
type = mongoid_field.type | |
value = source.send(mongoid_field.name) | |
value = value.to_s if type == ::Money | |
target.send("#{target_field_name}=", value) | |
end | |
def migrate_source_document(source, target_class) | |
source_class = source.class | |
target = target_class.new | |
puts "Migrating #{source.class} #{source.id}..." | |
# Store Mongo document ID | |
target.mongo_id = source.id.to_s | |
# Copy fields | |
each_basic_field(source_class) do |field| | |
copy_field_value(source, target, field) | |
end | |
# For each relation, we store the Mongo document id in | |
# a temporary <key field>_mongo_id string column | |
each_belongs_to(source_class) do |relation| | |
field_name = relation.foreign_key | |
value = source.send(field_name).to_s | |
target.send("#{field_name}_mongo_id=", value) | |
end | |
each_embedded_in(source_class) do |relation| | |
field_name = "#{relation.name}_id" | |
value = source.send(relation.name).id.to_s | |
target.send("#{field_name}_mongo_id=", value) | |
if relation.polymorphic? | |
target.send("#{relation.name}_type=", source.send(relation.name).class.name) | |
end | |
end | |
each_uploader(source_class) do |field, uploader_type| | |
target.send("#{field}=", source.send('[]', field)) | |
end | |
target.save! | |
target | |
end | |
# Yield block for every basic (not key, uploader etc) field | |
def each_basic_field(mongoid_model, &block) | |
mongoid_model.fields.each do |field_name, field| | |
unless field.name[0] == '_' || # _type or _id | |
field.name.ends_with?('_id') && field.type == Object || # foreign key | |
mongoid_model.uploaders.keys.include?(field_name.to_sym) || # uploader | |
field.type == Symbol # something to do with polymorphic relations e.g. commentable_field | |
yield(field) | |
end | |
end | |
end | |
def each_belongs_to(mongoid_model, &block) | |
# Add foreign key fields | |
mongoid_model.relations.each do |rel_name, relation| | |
if relation.relation == Mongoid::Relations::Referenced::In | |
yield(relation) | |
end | |
end | |
end | |
def each_embedded_relation(mongoid_model, &block) | |
# Add foreign key fields | |
mongoid_model.embedded_relations.each do |rel_name, relation| | |
yield(relation) | |
end | |
end | |
def each_embedded_in(mongoid_model, &block) | |
# Add foreign key fields | |
embedded_in_relations(mongoid_model).each do |rel_name, relation| | |
yield(relation) | |
end | |
end | |
def each_uploader(mongoid_model, &block) | |
mongoid_model.uploaders.each do |field, uploader_type| | |
yield(field, uploader_type) | |
end | |
end | |
def define_active_record_model_class(mongoid_model) | |
@_ar_class_cache ||= {} | |
arclass = @_ar_class_cache[mongoid_model] | |
return arclass if arclass | |
target_class_name = "Mysql#{mongoid_model.name}" | |
# Define an ActiveRecord model class | |
target_class = Class.new(ActiveRecord::Base) do | |
set_table_name mongoid_model.name.tableize | |
end | |
Kernel.const_set(target_class_name, target_class) | |
@_ar_class_cache[mongoid_model] = target_class | |
#target_class | |
end | |
def migrator | |
@_migrator ||= ActiveRecord::Migration.new | |
end | |
def colorize(text, color_code) | |
"\033[#{color_code}m#{text}\033[0m" | |
end | |
COLORS = { | |
:black => 30, | |
:red => 31, | |
:green => 32, | |
:yellow => 33, | |
:blue => 34, | |
:magenta => 35, | |
:cyan => 36, | |
:white => 37, | |
:warn => 36, | |
:error => 35 | |
} | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment