AkselA · June 11, 2025 11:52
diff --git a/mutation_simulation.py b/mutation_simulation.py
 # # # Aksel A. Henriksen 10.07.2025
 # # # With good suggestions by the folks at Code Review:
 # # # https://codereview.stackexchange.com/questions/297264/simple-mutation-simulation-for-use-in-science-class

 import random
 import os
 import csv


 def diss(seq_1, seq_2):
    """
    Counts number of dissimilarities between two sequences.
    Also called Hamming distance.
    
    Keyword arguments:
    seq_1 -- A list or tuple
    seq_2 -- A list or tuple
    
    If the input sequences aren't of equal length, then the latter
    part of the longer sequence is ignored.
    
    Return:
    Single integer value
    """
    
    count = 0
    seq_length = min([len(seq_1), len(seq_2)])
    
    for index in range(seq_length):
        if seq_1[index] != seq_2[index]:
            count += 1
            
    return count


 def mutate(dna_seq):
    """
    Mutates a DNA sequence, substituting a nucleotide at a random location
    with a random nucleotide.
    
    Keyword arguments:
    dna_seq -- A list containing single upper case characters [A, T, G, C]
    
    Return:
    A list containing single upper case characters [A, T, G, C], 
    same length as input
    """
    
    # Possible nucleotides
    pos_nuc = ['A', 'T', 'G', 'C']
    
    # Random location for mutation
    mut_loc = random.randint(0, len(dna_seq)-1)
       
    # Select one nucleotide at random and substitute it into the dna sequence
    dna_seq[mut_loc] = random.choice(pos_nuc)
    
    return(dna_seq)


 def sim_once(dna_seq, generations):
    """
    Mutates a DNA sequence ('dna_seq') 'generations' times. 
    For each mutation the algorithm calculates the proportional
    dissimilarity between it and the original sequence.
    
    Keyword arguments:
    dna_seq -- A list containing single upper case characters [A, T, G, C]
    generations -- An integer specifying the number of generations
    
    Return:
    A list of floating point values. The list is 'generations' long.
    """
    
    # Store a copy of the original DNA sequence that
    # the mutated sequence can be compared to.
    dna_seq_orig = dna_seq[:]
    seq_length = len(dna_seq)
    diss_val = []
    
    # For loop that mutates the sequence generations times and calculates the
    # dissimilarity after each mutation.
    for _ in range(generations):
        # Mutate the DNA sequence  
        dna_seq = mutate(dna_seq)
        # Calculate proportional dissimalirity
        diss_val.append(diss(dna_seq, dna_seq_orig)/seq_length)
    
    # Print proportional dissimilarities.
    return diss_val


 def sim_repeat(dna_seq, generations, reps):
    """
    Repeat the sim_once function 'reps' times.
    
    Keyword arguments:
    dna_seq -- list containing single upper case characters [A, T, G, C]
    generations -- integer specifying the number of generations
    reps -- integer specifying the number of times to repeat the simulation
    
    Return: 
    A 2D rectangular list of floating point values. 
    Each of the 'reps' sub-lists is the result of an individual run of the 
    function sim_once, and is therefore 'generations' long.
    """
    
    mutate_mat = []
    for _ in range(reps):
        mutate_mat.append(sim_once(dna_seq, generations))
    return mutate_mat


 def export_mutate(mutate_mat, filename="mutation_simulation.csv"):
    """
    Helps with exporting mutation simulation results to a CSV file
    that can be read by other programs, like Google Sheets.
    
    Keyword arguments:
    mutate_mat -- A 2D rectangular list
    filename -- Text string with a .csv ending
    
    Return:
    Saves a CSV file in the current working directory and returns a text
    string giving the path to the saved file.
    """
    
    # Add a count of the generations
    generation_count = list(range(1, len(mutate_mat[0])+1))
    mutate_mat.insert(0, generation_count)
    # Transpose the list (swap rows and columns)
    # This creates a list of tuples, not a list of lists
    mutate_mat_trans = list(zip(*mutate_mat))
    
    # Add headers
    header = ["generation"]
    for i in range(1, len(mutate_mat_trans[0])):
        header.append("sim_" + str(i))
    
    mutate_mat_trans.insert(0, header)
    
    # Export simulation data as a CSV file that can be imported to
    # Google Sheets
    with open(filename, 'w', newline='') as csvfile:
        csvwriter = csv.writer(csvfile, delimiter=',')
        csvwriter.writerows(mutate_mat_trans)
    
    # Print the path to the CSV file
    return os.path.join(os.getcwd(), filename)
diff --git a/run_sim.py b/run_sim.py
 # # # Simulation start

 dna_seq = list("ATGC" * 4) # Original DNA sequence
 num_sims = 5               # Number of simulations
 num_gens = 20              # Number of generations in each simulation

 # Set random seed so the simulation is repeatable
 random.seed(1)

 mutate_mat = sim_repeat(dna_seq, num_gens, num_sims)

 # Export simulation results
 export_mutate(mutate_mat)
	# # # Aksel A. Henriksen 10.07.2025
	# # # With good suggestions by the folks at Code Review:
	# # # https://codereview.stackexchange.com/questions/297264/simple-mutation-simulation-for-use-in-science-class

	import random
	import os
	import csv


	def diss(seq_1, seq_2):
	"""
	Counts number of dissimilarities between two sequences.
	Also called Hamming distance.

	Keyword arguments:
	seq_1 -- A list or tuple
	seq_2 -- A list or tuple

	If the input sequences aren't of equal length, then the latter
	part of the longer sequence is ignored.

	Return:
	Single integer value
	"""

	count = 0
	seq_length = min([len(seq_1), len(seq_2)])

	for index in range(seq_length):
	if seq_1[index] != seq_2[index]:
	count += 1

	return count


	def mutate(dna_seq):
	"""
	Mutates a DNA sequence, substituting a nucleotide at a random location
	with a random nucleotide.

	Keyword arguments:
	dna_seq -- A list containing single upper case characters [A, T, G, C]

	Return:
	A list containing single upper case characters [A, T, G, C],
	same length as input
	"""

	# Possible nucleotides
	pos_nuc = ['A', 'T', 'G', 'C']

	# Random location for mutation
	mut_loc = random.randint(0, len(dna_seq)-1)

	# Select one nucleotide at random and substitute it into the dna sequence
	dna_seq[mut_loc] = random.choice(pos_nuc)

	return(dna_seq)


	def sim_once(dna_seq, generations):
	"""
	Mutates a DNA sequence ('dna_seq') 'generations' times.
	For each mutation the algorithm calculates the proportional
	dissimilarity between it and the original sequence.

	Keyword arguments:
	dna_seq -- A list containing single upper case characters [A, T, G, C]
	generations -- An integer specifying the number of generations

	Return:
	A list of floating point values. The list is 'generations' long.
	"""

	# Store a copy of the original DNA sequence that
	# the mutated sequence can be compared to.
	dna_seq_orig = dna_seq[:]
	seq_length = len(dna_seq)
	diss_val = []

	# For loop that mutates the sequence generations times and calculates the
	# dissimilarity after each mutation.
	for _ in range(generations):
	# Mutate the DNA sequence
	dna_seq = mutate(dna_seq)
	# Calculate proportional dissimalirity
	diss_val.append(diss(dna_seq, dna_seq_orig)/seq_length)

	# Print proportional dissimilarities.
	return diss_val


	def sim_repeat(dna_seq, generations, reps):
	"""
	Repeat the sim_once function 'reps' times.

	Keyword arguments:
	dna_seq -- list containing single upper case characters [A, T, G, C]
	generations -- integer specifying the number of generations
	reps -- integer specifying the number of times to repeat the simulation

	Return:
	A 2D rectangular list of floating point values.
	Each of the 'reps' sub-lists is the result of an individual run of the
	function sim_once, and is therefore 'generations' long.
	"""

	mutate_mat = []
	for _ in range(reps):
	mutate_mat.append(sim_once(dna_seq, generations))
	return mutate_mat


	def export_mutate(mutate_mat, filename="mutation_simulation.csv"):
	"""
	Helps with exporting mutation simulation results to a CSV file
	that can be read by other programs, like Google Sheets.

	Keyword arguments:
	mutate_mat -- A 2D rectangular list
	filename -- Text string with a .csv ending

	Return:
	Saves a CSV file in the current working directory and returns a text
	string giving the path to the saved file.
	"""

	# Add a count of the generations
	generation_count = list(range(1, len(mutate_mat[0])+1))
	mutate_mat.insert(0, generation_count)
	# Transpose the list (swap rows and columns)
	# This creates a list of tuples, not a list of lists
	mutate_mat_trans = list(zip(*mutate_mat))

	# Add headers
	header = ["generation"]
	for i in range(1, len(mutate_mat_trans[0])):
	header.append("sim_" + str(i))

	mutate_mat_trans.insert(0, header)

	# Export simulation data as a CSV file that can be imported to
	# Google Sheets
	with open(filename, 'w', newline='') as csvfile:
	csvwriter = csv.writer(csvfile, delimiter=',')
	csvwriter.writerows(mutate_mat_trans)

	# Print the path to the CSV file
	return os.path.join(os.getcwd(), filename)
	# # # Simulation start

	dna_seq = list("ATGC" * 4) # Original DNA sequence
	num_sims = 5 # Number of simulations
	num_gens = 20 # Number of generations in each simulation

	# Set random seed so the simulation is repeatable
	random.seed(1)

	mutate_mat = sim_repeat(dna_seq, num_gens, num_sims)

	# Export simulation results
	export_mutate(mutate_mat)