Created
December 16, 2019 15:46
-
-
Save a11ce/cc40e49f38e61b9d2b62c1b6f0f5724a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def main(): | |
hSeq = "HEAGAWGHEE" | |
#hSeq = "z" | |
vSeq = "PAWHEAE" | |
#vSeq = "ab" | |
data = makeSameOne(hSeq, vSeq) | |
#showTable(hSeq, vSeq, data, hideZero=True) | |
f, t = makeFT(hSeq, vSeq) | |
print(transcribe(hSeq, vSeq, f, t)) | |
showTable(hSeq, vSeq, f) | |
showTable(hSeq, vSeq, t) | |
def transcribe(hSeq, vSeq, f, t): | |
vOut = "" | |
hOut = "" | |
cLoc = [len(vSeq), len(hSeq)] | |
score = f[cLoc[0]][cLoc[1]] | |
while True: | |
v = t[cLoc[0]][cLoc[1]] | |
if v == "END": | |
return hOut, vOut, score | |
cLoc[0] -= v % 10 | |
cLoc[1] -= v // 10 | |
if v // 10 == 1: | |
h, hSeq = consume(hSeq) | |
else: | |
h = "-" | |
if v % 10 == 1: | |
v, vSeq = consume(vSeq) | |
else: | |
v = "-" | |
vOut = v + vOut | |
hOut = h + hOut | |
def consume(s): | |
return s[-1], s[:-1] | |
def makeBlank(hSeq, vSeq): | |
return [[0 for x in range(len(hSeq))] for y in range(len(vSeq))] | |
def makeSameOne(hSeq, vSeq): | |
table = [[0 for x in range(len(hSeq))] for y in range(len(vSeq))] | |
for idx, x in enumerate(hSeq): | |
for idy, y in enumerate(vSeq): | |
if x == y: | |
table[idy][idx] = table[idy - 1][idx - 1] + 1 | |
return table | |
def makeFT(hSeq, vSeq): | |
blosum = loadBlosum() | |
f = [[0 for y in range(len(hSeq) + 1)] for x in range(len(vSeq) + 1)] | |
t = [[" " for y in range(len(hSeq) + 1)] for x in range(len(vSeq) + 1)] | |
d = 8 | |
t[0][0] = "END" | |
for x in range(1, len(vSeq) + 1): | |
f[x][0] = f[x - 1][0] - d | |
t[x][0] = 1 | |
for y in range(1, len(hSeq) + 1): | |
f[0][y] = f[0][y - 1] - d | |
t[0][y] = 10 | |
for x in range(1, len(vSeq) + 1): | |
for y in range(1, len(hSeq) + 1): | |
m = max([(v, i) for i, v in enumerate([ | |
f[x - 1][y - 1] + blosum[hSeq[y - 1]][vSeq[x - 1]], | |
f[x - 1][y] - d, | |
f[x][y - 1] - d, | |
])]) | |
f[x][y] = m[0] | |
t[x][y] = [11, 1, 10][m[1]] | |
return f, t | |
def loadBlosum(): | |
blosum = {} | |
with open("df_blosum50.csv") as f: | |
head = f.readline().strip().split(",")[1:] | |
for line in f: | |
arr = line.split(",") | |
blosum[arr[0]] = dict(zip(head, [int(x) for x in arr[1:]])) | |
return blosum | |
def showTable(hSeq, vSeq, data, hideZero=False): | |
print(" ", end="") | |
[print(c, end=" ") for c in hSeq] | |
print() | |
for n in range(len(vSeq) + 1): | |
print(vSeq[n - 1] if n > 0 else " ", end=" ") | |
[ | |
print( | |
("0 " if c == 0 else c) if not (hideZero and c == 0) else " ", | |
end=" ") for c in data[n] | |
] | |
print() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment