Last active
July 20, 2016 20:41
-
-
Save KHerb/b3ec7ab1de867b19c9457734345fc20d to your computer and use it in GitHub Desktop.
Compares AA sequences of homologous proteins (ie mutant & wild-type)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__AUTHOR__ = 'KARL HERBINE' | |
AA = {'LYS':'K', 'ALA':'A', 'ARG':'R', 'ASN':'N', 'ASP':'D','CYS':'C','GLU':'E', 'GLY':'G','HIS':'H', 'ILE':'I','LEU':'L','MET':'M', 'PHE':'F', 'PRO':'P', 'SER':'S', 'THR':'T', 'TRP':'W', 'TYR':'Y','VAL':'V','GLN':'Q'} | |
CHAINS = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') | |
print 'Amino Acid Sequence differences'.center(100,' ') | |
print 'This Python script is useful for comparing AA sequences between two polypeptides (eg. Mutant vs Wild-Type)' | |
print 'NOTE: SOME HOMOLOGOUS STRUCTURES MAY NOT CONTAIN SAME NUMBER OF RESIDUES'.center(100,' ') | |
pdb1 = str(raw_input("Enter the name of the first pdb file (name.pdb): ")) | |
pdb2 = str(raw_input("Enter the name of the second pdb file (name.pdb): ")) | |
chain1 = ['' for x in range(0,len(CHAINS))] | |
chain2 = ['' for x in range(0,len(CHAINS))] | |
for letter in range(0,len(CHAINS)): | |
with open(pdb1,'r') as file1: | |
for line in file1: | |
if 'SEQRES' in line: | |
if ' %s ' % CHAINS[letter] in line: | |
for word in line.split(): | |
for i in AA.keys(): | |
if word == i: | |
chain1[letter] += AA.get(word) | |
for letter in range(0,len(CHAINS)): | |
with open(pdb2,'r') as file2: | |
for line in file2: | |
if 'SEQRES' in line: | |
if ' %s ' % CHAINS[letter] in line: | |
for word in line.split(): | |
for i in AA.keys(): | |
if word == i: | |
chain2[letter] += AA.get(word) | |
def sequences(s1,s2): | |
if len(s1) > len(s2): | |
length = len(s2) | |
else: | |
length = len(s1) | |
l = [int(i) for i in range(0,length)] | |
nomatch = [] | |
for i in l: | |
if s1[i] != s2[i]: | |
nomatch.append(i) | |
for i in nomatch: | |
print 'Amino Acid: %s%d substituted for %s%d' % (s1[i], i+1, s2[i], i+1) | |
if len(nomatch) == 0: | |
print "Total Amino Acid Substitutions".center(60,'-') | |
print "%d".center(60,' ') % len(nomatch) | |
print "These two sequences are identical." | |
else: | |
print "Total Amino Acid Substitutions".center(60,'-') | |
print "%d".center(60,' ') % len(nomatch) | |
print "".center(60,'-') | |
polar = 'STCYNQ' | |
non_polar = 'GAVLIMFWP' | |
acid = 'ED' | |
base = 'KRH' | |
aromatic = 'WFY' | |
for i in nomatch: | |
if s1[i] in polar and s2[i] not in polar: | |
if s2[i] in aromatic: | |
print "Amino Acid %s%d is POLAR and has been substituted for %s%d which has an AROMATIC R-group" % (s1[i], i+1, s2[i], i+1) | |
elif s2[i] in acid: | |
print "Amino Acid %s%d is POLAR and has been substituted for %s%d, a POLAR aa with an ACIDIC R-group" % (s1[i], i+1, s2[i], i+1) | |
elif s2[i] in non_polar: | |
print "Amino Acid %s%d is POLAR and has been substituted for %s%d, a NON-POLAR aa" % (s1[i], i+1, s2[i], i+1) | |
elif s2[i] in base: | |
print "Amino Acid %s%d is POLAR and has been substituted for %s%d, a POLAR aa with a BASIC R-group" % (s1[i], i+1, s2[i], i+1) | |
elif s1[i] in non_polar and s2[i] not in non_polar: | |
if s2[i] in aromatic: | |
print "Amino Acid %s%d is NON-POLAR and has been substituted for %s%d which has an AROMATIC R-group"% (s1[i], i+1, s2[i], i+1) | |
elif s2[i] in acid: | |
print "Amino Acid %s%d is NON-POLAR and has been substituted for %s%d, a POLAR aa with an ACIDIC R-group"% (s1[i], i+1, s2[i], i+1) | |
elif s2[i] in base: | |
print "Amino Acid %s%d is NON-POLAR and has been substituted for %s%d, a POLAR aa with a BASIC R-group" % (s1[i], i+1, s2[i], i+1) | |
elif s2[i] in polar: | |
print "Amino Acid %s%d is NON-POLAR and has been substituted for %s%d, a POLAR aa" % (s1[i], i+1, s2[i], i+1) | |
elif s1[i] in aromatic and s2[i] not in aromatic: | |
if s2[i] in base: | |
print "Amino Acid %s%d is AROMATIC and has been substituted for %s%d, a POLAR aa with a BASIC R-group" % (s1[i], i+1, s2[i], i+1) | |
elif s2[i] in acid: | |
print "Amino Acid %s%d is AROMATIC and has been substituted for %s%d, a POLAR aa with an ACIDIC R-group" % (s1[i], i+1, s2[i], i+1) | |
elif s2[i] in non_polar: | |
print "Amino Acid %s%d is AROMATIC and has been substituted for %s%d, a NON-POLAR aa" % (s1[i], i+1, s2[i], i+1) | |
elif s2[i] in polar: | |
print "Amino Acid %s%d is AROMATIC and has been substituted for %s%d, a POLAR aa" % (s1[i], i+1, s2[i], i+1) | |
elif s1[i] in base and s2[i] not in base: | |
if s2[i] in aromatic: | |
print "Amino Acid %s%d is BASIC and has been substituted for %s%d which has an AROMATIC R-group " % (s1[i], i+1, s2[i], i+1) | |
elif s2[i] in acid: | |
print "Amino Acid %s%d is BASIC and has been substituted for %s%d, a POLAR aa with an ACIDIC R-group" % (s1[i], i+1, s2[i], i+1) | |
elif s2[i] in non_polar: | |
print "Amino Acid %s%d is BASIC and has been substituted for %s%d, a NON-POLAR aa" % (s1[i], i+1, s2[i], i+1) | |
elif s2[i] in polar: | |
print "Amino Acid %s%d is BASIC and has been substituted for %s%d, a POLAR aa" % (s1[i], i+1, s2[i], i+1) | |
elif s1[i] in acid and s2[i] not in acid: | |
if s2[i] in aromatic: | |
print "Amino Acid %s%d is ACIDIC and has been substituted for %s%d which has an AROMATIC R-group " % (s1[i], i+1, s2[i], i+1) | |
elif s2[i] in base: | |
print "Amino Acid %s%d is ACIDIC and has been substituted for %s%d, a POLAR aa with a BASIC R-group" % (s1[i], i+1, s2[i], i+1) | |
elif s2[i] in non_polar: | |
print "Amino Acid %s%d is ACIDIC and has been substituted for %s%d, a NON-POLAR aa" % (s1[i], i+1, s2[i], i+1) | |
elif s2[i] in polar: | |
print "Amino Acid %s%d is ACIDIC and has been substituted for %s%d, a POLAR aa" % (s1[i], i+1, s2[i], i+1) | |
else: | |
print "Amino Acid %s%d has been substituted for %s%d with an equally functional R-Group" % (s1[i], i+1, s2[i], i+1) | |
def chains(): | |
total_chains = [i for i in chain1 if len(i)>1] | |
for a in range(len(total_chains)): | |
print ''.center(60,'-') | |
print 'CHAIN %s'.center(60,' ') % (CHAINS[a]) | |
print ''.center(60,'-') | |
print "Total Amino Acids in %s is: %d" % (pdb1, len(chain1[a])) | |
print "Total Amino Acids in %s is: %d" % (pdb2, len(chain2[a])) | |
print "Amino Acid Sequence for CHAIN %s in %s: %s" % (CHAINS[a], pdb1, chain1[a]) | |
print "Amino Acid Sequence for CHAIN %s in %s: %s" % (CHAINS[a], pdb2, chain2[a]) | |
sequences(chain1[a],chain2[a]) | |
chains() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment