Last active
September 1, 2019 11:13
-
-
Save vestalisvirginis/131201d629fedd5fc20c969bb2d1b210 to your computer and use it in GitHub Desktop.
dna manipulation functions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itertools import product | |
from functional import seq | |
def all_kmers(k): | |
'''return list of all dna carthesien products of length k''' | |
all = list(product('ACGT', repeat=k)) | |
return seq(all).map(lambda x: ''.join(x)).to_list() | |
def kmer_per_segment(dna_segment, k): | |
'''return all the dna substrings of length k of the different dna strings in dna list''' | |
return [dna_segment[i:i+k] for i in range(len(dna_segment)-(k-1))] | |
def median_distance(dna, k, list_kmers): | |
'''return the first substring of length k encounters with the lowest substring/dna distance''' | |
distance = float('inf') | |
median = (str) | |
for kmer in all_kmers(k): | |
sum_distance = 0 | |
for i in dna: | |
distance_per_kmer = float('inf') | |
for mer in kmer_per_segment(i, k): | |
d = lv.distance(mer,kmer) | |
if distance_per_kmer > d: | |
distance_per_kmer = d | |
sum_distance += distance_per_kmer | |
if distance > sum_distance: | |
distance = sum_distance | |
median = kmer | |
return distance, median |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment