Created
December 8, 2017 12:22
-
-
Save akatriel/c505b524dcfa4843ae54b054e1c22ac3 to your computer and use it in GitHub Desktop.
Motif Finding created by akatriel1 - https://repl.it/@akatriel1/Motif-Finding
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def HammingDistance(str1, str2): | |
count = 0 | |
for i in range(len(str1)): | |
if str1[i] != str2[i]: | |
count += 1 | |
return count | |
def neighbors(kmer, distance): | |
k = len(kmer) | |
if k == 0: | |
return {} | |
if distance == 0: | |
return {kmer} | |
if k == 1: | |
return {'A', 'T', 'C', 'G'} | |
suffix = kmer[1:] | |
suffix_neighbors = neighbors(suffix, distance) | |
result = set() | |
for suffix_neighbor in suffix_neighbors: | |
if HammingDistance(suffix_neighbor, suffix) == distance: | |
result.add(kmer[0] + suffix_neighbor) | |
else: | |
result.add('A' + suffix_neighbor) | |
result.add('T' + suffix_neighbor) | |
result.add('C' + suffix_neighbor) | |
result.add('G' + suffix_neighbor) | |
return result | |
# MotifEnumeration(Dna, k, d) | |
# Patterns ← an empty set | |
# for each k-mer Pattern in the first string in Dna | |
# for each k-mer Pattern’ differing from Pattern by at most d mismatches | |
# if Pattern' appears in each string from Dna with at most d mismatches | |
# add Pattern' to Patterns | |
# remove duplicates from Patterns | |
# return Patterns | |
def MotifEnumeration(dna, k, d): | |
patterns = [] | |
motifs = [] | |
for strand in dna: | |
for i in range(len(strand)-k+1): | |
motifs.append(list(neighbors(strand[i:i+k], d))) | |
result = set(motifs[0]) | |
for s in motifs[1:]: | |
result.intersection(*s) | |
o = [] | |
for res in result: | |
for strand in dna: | |
for i in range(len(strand)-k+1): | |
if(res in neighbors(strand[i:i+k], d)): | |
o.append(res) | |
print(list(set(o))) | |
return list(patterns) | |
k = 3 | |
d = 1 | |
t = [ | |
"ATTTGGC", | |
"TGCCTTA", | |
"CGGTATC", | |
"GAAAATT"] | |
print(MotifEnumeration(t,k,d)) | |
# print(neighbors(t[0], 1)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment