Created
October 2, 2018 01:20
-
-
Save PatWalters/c046fee2760e6894ed13e19b8c99193b to your computer and use it in GitHub Desktop.
An improved script to extract a ligand from a protein-ligand complex and assign bond orders
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
from prody import * | |
from rdkit import Chem | |
from rdkit.Chem import AllChem | |
from io import StringIO | |
import pypdb | |
def get_pdb_components(pdb_id): | |
""" | |
Split a protein-ligand pdb into protein and ligand components | |
:param pdb_id: | |
:return: | |
""" | |
pdb = parsePDB(pdb_id) | |
protein = pdb.select('protein') | |
ligand = pdb.select('not protein and not water') | |
return protein, ligand | |
def process_ligand(ligand, res_name): | |
""" | |
Add bond orders to a pdb ligand | |
1. Select the ligand component with name "res_name" | |
2. Get the corresponding SMILES from pypdb | |
3. Create a template molecule from the SMILES in step 2 | |
4. Write the PDB file to a stream | |
5. Read the stream into an RDKit molecule | |
6. Assign the bond orders from the template from step 3 | |
:param ligand: ligand as generated by prody | |
:param res_name: residue name of ligand to extract | |
:return: molecule with bond orders assigned | |
""" | |
output = StringIO() | |
sub_mol = ligand.select(f"resname {res_name}") | |
chem_desc = pypdb.describe_chemical(f"{res_name}") | |
sub_smiles = chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"] | |
template = AllChem.MolFromSmiles(sub_smiles) | |
writePDBStream(output, sub_mol) | |
pdb_string = output.getvalue() | |
rd_mol = AllChem.MolFromPDBBlock(pdb_string) | |
new_mol = AllChem.AssignBondOrdersFromTemplate(template, rd_mol) | |
return new_mol | |
def write_pdb(protein, pdb_name): | |
""" | |
Write a prody protein to a pdb file | |
:param protein: protein object from prody | |
:param pdb_name: base name for the pdb file | |
:return: None | |
""" | |
output_pdb_name = f"{pdb_name}_protein.pdb" | |
writePDB(f"{output_pdb_name}", protein) | |
print(f"wrote {output_pdb_name}") | |
def write_sdf(new_mol, pdb_name, res_name): | |
""" | |
Write an RDKit molecule to an SD file | |
:param new_mol: | |
:param pdb_name: | |
:param res_name: | |
:return: | |
""" | |
outfile_name = f"{pdb_name}_{res_name}_ligand.sdf" | |
writer = Chem.SDWriter(f"{outfile_name}") | |
writer.write(new_mol) | |
print(f"wrote {outfile_name}") | |
def main(pdb_name): | |
""" | |
Read Ligand Expo data, split pdb into protein and ligands, | |
write protein pdb, write ligand sdf files | |
:param pdb_name: id from the pdb, doesn't need to have an extension | |
:return: | |
""" | |
protein, ligand = get_pdb_components(pdb_name) | |
write_pdb(protein, pdb_name) | |
res_name_list = list(set(ligand.getResnames())) | |
for res in res_name_list: | |
new_mol = process_ligand(ligand, res) | |
write_sdf(new_mol, pdb_name, res) | |
if __name__ == "__main__": | |
if len(sys.argv) == 2: | |
main(sys.argv[1]) | |
else: | |
print("Usage: {sys.argv[1]} pdb_id", file=sys.stderr) |
Thank you for your great work.
I have a question that in the process_ligand function, why we do not directly use sub_mol instead of new_mol? Is it will change the coordinate of the extracted ligand?
I need to keep the 3D structure of ligand (conformer) when splitting. How can we do that?
The variable sub_mol is a Prody molecule. To assign bond orders, we need to convert this to an RDKit molecule. To this, we first convert the prody_ligand molecule to a PDB string, then parse the PDB string with the RDKit. I've added a commented version of parse_ligand below. Note that new_mol contains the coordinates from the original pdb.
# create a StringIO object that will hold the PDB string
output = StringIO()
# select the Prody molecule with the ligand
sub_mol = ligand.select(f"resname {res_name}")
# get the SMILES for the ligand
chem_desc = pypdb.describe_chemical(f"{res_name}")
sub_smiles = chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"]
# create an RDKit molecule with bond orders from the SMILES
template = AllChem.MolFromSmiles(sub_smiles)
# write the Prody molecule as a PDB to a string
writePDBStream(output, sub_mol)
# get the pdb string from the StringIO object
pdb_string = output.getvalue()
# create an RDKit molecule from the pdb string
rd_mol = AllChem.MolFromPDBBlock(pdb_string)
# use the template molecule to assign bond orders
new_mol = AllChem.AssignBondOrdersFromTemplate(template, rd_mol)
# return the molecule with bond orders
return new_mol
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Dear Dr. Walters,
Thank you for sharing this script; it works like a charm! The only thing I had to update was due to the changes in pypdb. Instead of
sub_smiles = chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"]
I used