Skip to content

Instantly share code, notes, and snippets.

@KHerb
Last active July 20, 2016 20:41
Show Gist options
  • Save KHerb/b3ec7ab1de867b19c9457734345fc20d to your computer and use it in GitHub Desktop.
Save KHerb/b3ec7ab1de867b19c9457734345fc20d to your computer and use it in GitHub Desktop.
Compares AA sequences of homologous proteins (ie mutant & wild-type)
__AUTHOR__ = 'KARL HERBINE'
AA = {'LYS':'K', 'ALA':'A', 'ARG':'R', 'ASN':'N', 'ASP':'D','CYS':'C','GLU':'E', 'GLY':'G','HIS':'H', 'ILE':'I','LEU':'L','MET':'M', 'PHE':'F', 'PRO':'P', 'SER':'S', 'THR':'T', 'TRP':'W', 'TYR':'Y','VAL':'V','GLN':'Q'}
CHAINS = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
print 'Amino Acid Sequence differences'.center(100,' ')
print 'This Python script is useful for comparing AA sequences between two polypeptides (eg. Mutant vs Wild-Type)'
print 'NOTE: SOME HOMOLOGOUS STRUCTURES MAY NOT CONTAIN SAME NUMBER OF RESIDUES'.center(100,' ')
pdb1 = str(raw_input("Enter the name of the first pdb file (name.pdb): "))
pdb2 = str(raw_input("Enter the name of the second pdb file (name.pdb): "))
chain1 = ['' for x in range(0,len(CHAINS))]
chain2 = ['' for x in range(0,len(CHAINS))]
for letter in range(0,len(CHAINS)):
with open(pdb1,'r') as file1:
for line in file1:
if 'SEQRES' in line:
if ' %s ' % CHAINS[letter] in line:
for word in line.split():
for i in AA.keys():
if word == i:
chain1[letter] += AA.get(word)
for letter in range(0,len(CHAINS)):
with open(pdb2,'r') as file2:
for line in file2:
if 'SEQRES' in line:
if ' %s ' % CHAINS[letter] in line:
for word in line.split():
for i in AA.keys():
if word == i:
chain2[letter] += AA.get(word)
def sequences(s1,s2):
if len(s1) > len(s2):
length = len(s2)
else:
length = len(s1)
l = [int(i) for i in range(0,length)]
nomatch = []
for i in l:
if s1[i] != s2[i]:
nomatch.append(i)
for i in nomatch:
print 'Amino Acid: %s%d substituted for %s%d' % (s1[i], i+1, s2[i], i+1)
if len(nomatch) == 0:
print
print "Total Amino Acid Substitutions".center(60,'-')
print "%d".center(60,' ') % len(nomatch)
print "These two sequences are identical."
print
else:
print
print "Total Amino Acid Substitutions".center(60,'-')
print "%d".center(60,' ') % len(nomatch)
print "".center(60,'-')
print
polar = 'STCYNQ'
non_polar = 'GAVLIMFWP'
acid = 'ED'
base = 'KRH'
aromatic = 'WFY'
for i in nomatch:
if s1[i] in polar and s2[i] not in polar:
if s2[i] in aromatic:
print "Amino Acid %s%d is POLAR and has been substituted for %s%d which has an AROMATIC R-group" % (s1[i], i+1, s2[i], i+1)
elif s2[i] in acid:
print "Amino Acid %s%d is POLAR and has been substituted for %s%d, a POLAR aa with an ACIDIC R-group" % (s1[i], i+1, s2[i], i+1)
elif s2[i] in non_polar:
print "Amino Acid %s%d is POLAR and has been substituted for %s%d, a NON-POLAR aa" % (s1[i], i+1, s2[i], i+1)
elif s2[i] in base:
print "Amino Acid %s%d is POLAR and has been substituted for %s%d, a POLAR aa with a BASIC R-group" % (s1[i], i+1, s2[i], i+1)
elif s1[i] in non_polar and s2[i] not in non_polar:
if s2[i] in aromatic:
print "Amino Acid %s%d is NON-POLAR and has been substituted for %s%d which has an AROMATIC R-group"% (s1[i], i+1, s2[i], i+1)
elif s2[i] in acid:
print "Amino Acid %s%d is NON-POLAR and has been substituted for %s%d, a POLAR aa with an ACIDIC R-group"% (s1[i], i+1, s2[i], i+1)
elif s2[i] in base:
print "Amino Acid %s%d is NON-POLAR and has been substituted for %s%d, a POLAR aa with a BASIC R-group" % (s1[i], i+1, s2[i], i+1)
elif s2[i] in polar:
print "Amino Acid %s%d is NON-POLAR and has been substituted for %s%d, a POLAR aa" % (s1[i], i+1, s2[i], i+1)
elif s1[i] in aromatic and s2[i] not in aromatic:
if s2[i] in base:
print "Amino Acid %s%d is AROMATIC and has been substituted for %s%d, a POLAR aa with a BASIC R-group" % (s1[i], i+1, s2[i], i+1)
elif s2[i] in acid:
print "Amino Acid %s%d is AROMATIC and has been substituted for %s%d, a POLAR aa with an ACIDIC R-group" % (s1[i], i+1, s2[i], i+1)
elif s2[i] in non_polar:
print "Amino Acid %s%d is AROMATIC and has been substituted for %s%d, a NON-POLAR aa" % (s1[i], i+1, s2[i], i+1)
elif s2[i] in polar:
print "Amino Acid %s%d is AROMATIC and has been substituted for %s%d, a POLAR aa" % (s1[i], i+1, s2[i], i+1)
elif s1[i] in base and s2[i] not in base:
if s2[i] in aromatic:
print "Amino Acid %s%d is BASIC and has been substituted for %s%d which has an AROMATIC R-group " % (s1[i], i+1, s2[i], i+1)
elif s2[i] in acid:
print "Amino Acid %s%d is BASIC and has been substituted for %s%d, a POLAR aa with an ACIDIC R-group" % (s1[i], i+1, s2[i], i+1)
elif s2[i] in non_polar:
print "Amino Acid %s%d is BASIC and has been substituted for %s%d, a NON-POLAR aa" % (s1[i], i+1, s2[i], i+1)
elif s2[i] in polar:
print "Amino Acid %s%d is BASIC and has been substituted for %s%d, a POLAR aa" % (s1[i], i+1, s2[i], i+1)
elif s1[i] in acid and s2[i] not in acid:
if s2[i] in aromatic:
print "Amino Acid %s%d is ACIDIC and has been substituted for %s%d which has an AROMATIC R-group " % (s1[i], i+1, s2[i], i+1)
elif s2[i] in base:
print "Amino Acid %s%d is ACIDIC and has been substituted for %s%d, a POLAR aa with a BASIC R-group" % (s1[i], i+1, s2[i], i+1)
elif s2[i] in non_polar:
print "Amino Acid %s%d is ACIDIC and has been substituted for %s%d, a NON-POLAR aa" % (s1[i], i+1, s2[i], i+1)
elif s2[i] in polar:
print "Amino Acid %s%d is ACIDIC and has been substituted for %s%d, a POLAR aa" % (s1[i], i+1, s2[i], i+1)
else:
print "Amino Acid %s%d has been substituted for %s%d with an equally functional R-Group" % (s1[i], i+1, s2[i], i+1)
def chains():
total_chains = [i for i in chain1 if len(i)>1]
for a in range(len(total_chains)):
print ''.center(60,'-')
print 'CHAIN %s'.center(60,' ') % (CHAINS[a])
print ''.center(60,'-')
print "Total Amino Acids in %s is: %d" % (pdb1, len(chain1[a]))
print "Total Amino Acids in %s is: %d" % (pdb2, len(chain2[a]))
print
print "Amino Acid Sequence for CHAIN %s in %s: %s" % (CHAINS[a], pdb1, chain1[a])
print "Amino Acid Sequence for CHAIN %s in %s: %s" % (CHAINS[a], pdb2, chain2[a])
print
sequences(chain1[a],chain2[a])
chains()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment