-
-
Save PatWalters/c046fee2760e6894ed13e19b8c99193b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
import sys | |
from prody import * | |
from rdkit import Chem | |
from rdkit.Chem import AllChem | |
from io import StringIO | |
import pypdb | |
def get_pdb_components(pdb_id): | |
""" | |
Split a protein-ligand pdb into protein and ligand components | |
:param pdb_id: | |
:return: | |
""" | |
pdb = parsePDB(pdb_id) | |
protein = pdb.select('protein') | |
ligand = pdb.select('not protein and not water') | |
return protein, ligand | |
def process_ligand(ligand, res_name): | |
""" | |
Add bond orders to a pdb ligand | |
1. Select the ligand component with name "res_name" | |
2. Get the corresponding SMILES from pypdb | |
3. Create a template molecule from the SMILES in step 2 | |
4. Write the PDB file to a stream | |
5. Read the stream into an RDKit molecule | |
6. Assign the bond orders from the template from step 3 | |
:param ligand: ligand as generated by prody | |
:param res_name: residue name of ligand to extract | |
:return: molecule with bond orders assigned | |
""" | |
output = StringIO() | |
sub_mol = ligand.select(f"resname {res_name}") | |
chem_desc = pypdb.describe_chemical(f"{res_name}") | |
sub_smiles = chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"] | |
template = AllChem.MolFromSmiles(sub_smiles) | |
writePDBStream(output, sub_mol) | |
pdb_string = output.getvalue() | |
rd_mol = AllChem.MolFromPDBBlock(pdb_string) | |
new_mol = AllChem.AssignBondOrdersFromTemplate(template, rd_mol) | |
return new_mol | |
def write_pdb(protein, pdb_name): | |
""" | |
Write a prody protein to a pdb file | |
:param protein: protein object from prody | |
:param pdb_name: base name for the pdb file | |
:return: None | |
""" | |
output_pdb_name = f"{pdb_name}_protein.pdb" | |
writePDB(f"{output_pdb_name}", protein) | |
print(f"wrote {output_pdb_name}") | |
def write_sdf(new_mol, pdb_name, res_name): | |
""" | |
Write an RDKit molecule to an SD file | |
:param new_mol: | |
:param pdb_name: | |
:param res_name: | |
:return: | |
""" | |
outfile_name = f"{pdb_name}_{res_name}_ligand.sdf" | |
writer = Chem.SDWriter(f"{outfile_name}") | |
writer.write(new_mol) | |
print(f"wrote {outfile_name}") | |
def main(pdb_name): | |
""" | |
Read Ligand Expo data, split pdb into protein and ligands, | |
write protein pdb, write ligand sdf files | |
:param pdb_name: id from the pdb, doesn't need to have an extension | |
:return: | |
""" | |
protein, ligand = get_pdb_components(pdb_name) | |
write_pdb(protein, pdb_name) | |
res_name_list = list(set(ligand.getResnames())) | |
for res in res_name_list: | |
new_mol = process_ligand(ligand, res) | |
write_sdf(new_mol, pdb_name, res) | |
if __name__ == "__main__": | |
if len(sys.argv) == 2: | |
main(sys.argv[1]) | |
else: | |
print("Usage: {sys.argv[1]} pdb_id", file=sys.stderr) |
So it turns out that the describe chemical function has been commented out of the pypdb. maby try uncommenting it?
Dear Dr. Walters,
Thank you for sharing this script; it works like a charm! The only thing I had to update was due to the changes in pypdb. Instead of
sub_smiles = chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"]
I used
sub_smiles = None
for item in chem_desc.get('pdbx_chem_comp_descriptor', []):
if item.get('type') == 'SMILES':
sub_smiles = item.get('descriptor')
break
Thank you for your great work.
I have a question that in the process_ligand function, why we do not directly use sub_mol instead of new_mol? Is it will change the coordinate of the extracted ligand?
I need to keep the 3D structure of ligand (conformer) when splitting. How can we do that?
The variable sub_mol is a Prody molecule. To assign bond orders, we need to convert this to an RDKit molecule. To this, we first convert the prody_ligand molecule to a PDB string, then parse the PDB string with the RDKit. I've added a commented version of parse_ligand below. Note that new_mol contains the coordinates from the original pdb.
# create a StringIO object that will hold the PDB string
output = StringIO()
# select the Prody molecule with the ligand
sub_mol = ligand.select(f"resname {res_name}")
# get the SMILES for the ligand
chem_desc = pypdb.describe_chemical(f"{res_name}")
sub_smiles = chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"]
# create an RDKit molecule with bond orders from the SMILES
template = AllChem.MolFromSmiles(sub_smiles)
# write the Prody molecule as a PDB to a string
writePDBStream(output, sub_mol)
# get the pdb string from the StringIO object
pdb_string = output.getvalue()
# create an RDKit molecule from the pdb string
rd_mol = AllChem.MolFromPDBBlock(pdb_string)
# use the template molecule to assign bond orders
new_mol = AllChem.AssignBondOrdersFromTemplate(template, rd_mol)
# return the molecule with bond orders
return new_mol
Hello Mr. Walter.
While I tried to implement this piece of code, I was stopped by the following error
module 'pypdb' has no attribute 'describe_chemical'
I see that it has been 3 years since you have posted this code.
Looks like things have changed. Can you help me?