Skip to content

Instantly share code, notes, and snippets.

@AkselA
Last active June 11, 2025 11:52
Show Gist options
  • Save AkselA/3d3730110733cb959387e3d31e9ca9fa to your computer and use it in GitHub Desktop.
Save AkselA/3d3730110733cb959387e3d31e9ca9fa to your computer and use it in GitHub Desktop.
Python functions for performing simple DNA mutation simulations
# # # Aksel A. Henriksen 10.07.2025
# # # With good suggestions by the folks at Code Review:
# # # https://codereview.stackexchange.com/questions/297264/simple-mutation-simulation-for-use-in-science-class
import random
import os
import csv
def diss(seq_1, seq_2):
"""
Counts number of dissimilarities between two sequences.
Also called Hamming distance.
Keyword arguments:
seq_1 -- A list or tuple
seq_2 -- A list or tuple
If the input sequences aren't of equal length, then the latter
part of the longer sequence is ignored.
Return:
Single integer value
"""
count = 0
seq_length = min([len(seq_1), len(seq_2)])
for index in range(seq_length):
if seq_1[index] != seq_2[index]:
count += 1
return count
def mutate(dna_seq):
"""
Mutates a DNA sequence, substituting a nucleotide at a random location
with a random nucleotide.
Keyword arguments:
dna_seq -- A list containing single upper case characters [A, T, G, C]
Return:
A list containing single upper case characters [A, T, G, C],
same length as input
"""
# Possible nucleotides
pos_nuc = ['A', 'T', 'G', 'C']
# Random location for mutation
mut_loc = random.randint(0, len(dna_seq)-1)
# Select one nucleotide at random and substitute it into the dna sequence
dna_seq[mut_loc] = random.choice(pos_nuc)
return(dna_seq)
def sim_once(dna_seq, generations):
"""
Mutates a DNA sequence ('dna_seq') 'generations' times.
For each mutation the algorithm calculates the proportional
dissimilarity between it and the original sequence.
Keyword arguments:
dna_seq -- A list containing single upper case characters [A, T, G, C]
generations -- An integer specifying the number of generations
Return:
A list of floating point values. The list is 'generations' long.
"""
# Store a copy of the original DNA sequence that
# the mutated sequence can be compared to.
dna_seq_orig = dna_seq[:]
seq_length = len(dna_seq)
diss_val = []
# For loop that mutates the sequence generations times and calculates the
# dissimilarity after each mutation.
for _ in range(generations):
# Mutate the DNA sequence
dna_seq = mutate(dna_seq)
# Calculate proportional dissimalirity
diss_val.append(diss(dna_seq, dna_seq_orig)/seq_length)
# Print proportional dissimilarities.
return diss_val
def sim_repeat(dna_seq, generations, reps):
"""
Repeat the sim_once function 'reps' times.
Keyword arguments:
dna_seq -- list containing single upper case characters [A, T, G, C]
generations -- integer specifying the number of generations
reps -- integer specifying the number of times to repeat the simulation
Return:
A 2D rectangular list of floating point values.
Each of the 'reps' sub-lists is the result of an individual run of the
function sim_once, and is therefore 'generations' long.
"""
mutate_mat = []
for _ in range(reps):
mutate_mat.append(sim_once(dna_seq, generations))
return mutate_mat
def export_mutate(mutate_mat, filename="mutation_simulation.csv"):
"""
Helps with exporting mutation simulation results to a CSV file
that can be read by other programs, like Google Sheets.
Keyword arguments:
mutate_mat -- A 2D rectangular list
filename -- Text string with a .csv ending
Return:
Saves a CSV file in the current working directory and returns a text
string giving the path to the saved file.
"""
# Add a count of the generations
generation_count = list(range(1, len(mutate_mat[0])+1))
mutate_mat.insert(0, generation_count)
# Transpose the list (swap rows and columns)
# This creates a list of tuples, not a list of lists
mutate_mat_trans = list(zip(*mutate_mat))
# Add headers
header = ["generation"]
for i in range(1, len(mutate_mat_trans[0])):
header.append("sim_" + str(i))
mutate_mat_trans.insert(0, header)
# Export simulation data as a CSV file that can be imported to
# Google Sheets
with open(filename, 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile, delimiter=',')
csvwriter.writerows(mutate_mat_trans)
# Print the path to the CSV file
return os.path.join(os.getcwd(), filename)
# # # Simulation start
dna_seq = list("ATGC" * 4) # Original DNA sequence
num_sims = 5 # Number of simulations
num_gens = 20 # Number of generations in each simulation
# Set random seed so the simulation is repeatable
random.seed(1)
mutate_mat = sim_repeat(dna_seq, num_gens, num_sims)
# Export simulation results
export_mutate(mutate_mat)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment