PatWalters · October 2, 2018 01:20 · iamkaant · Oct 19, 2023 · mainguyenanhvu · May 13, 2025
diff --git a/split_complex_v2.py b/split_complex_v2.py
 #!/usr/bin/env python

 import sys
 from prody import *
 from rdkit import Chem
 from rdkit.Chem import AllChem
 from io import StringIO
 import pypdb


 def get_pdb_components(pdb_id):
    """
    Split a protein-ligand pdb into protein and ligand components
    :param pdb_id:
    :return:
    """
    pdb = parsePDB(pdb_id)
    protein = pdb.select('protein')
    ligand = pdb.select('not protein and not water')
    return protein, ligand


 def process_ligand(ligand, res_name):
    """
    Add bond orders to a pdb ligand
    1. Select the ligand component with name "res_name"
    2. Get the corresponding SMILES from pypdb
    3. Create a template molecule from the SMILES in step 2
    4. Write the PDB file to a stream
    5. Read the stream into an RDKit molecule
    6. Assign the bond orders from the template from step 3
    :param ligand: ligand as generated by prody
    :param res_name: residue name of ligand to extract
    :return: molecule with bond orders assigned
    """
    output = StringIO()
    sub_mol = ligand.select(f"resname {res_name}")
    chem_desc = pypdb.describe_chemical(f"{res_name}")
    sub_smiles = chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"]
    template = AllChem.MolFromSmiles(sub_smiles)
    writePDBStream(output, sub_mol)
    pdb_string = output.getvalue()
    rd_mol = AllChem.MolFromPDBBlock(pdb_string)
    new_mol = AllChem.AssignBondOrdersFromTemplate(template, rd_mol)
    return new_mol


 def write_pdb(protein, pdb_name):
    """
    Write a prody protein to a pdb file
    :param protein: protein object from prody
    :param pdb_name: base name for the pdb file
    :return: None
    """
    output_pdb_name = f"{pdb_name}_protein.pdb"
    writePDB(f"{output_pdb_name}", protein)
    print(f"wrote {output_pdb_name}")


 def write_sdf(new_mol, pdb_name, res_name):
    """
    Write an RDKit molecule to an SD file
    :param new_mol:
    :param pdb_name:
    :param res_name:
    :return:
    """
    outfile_name = f"{pdb_name}_{res_name}_ligand.sdf"
    writer = Chem.SDWriter(f"{outfile_name}")
    writer.write(new_mol)
    print(f"wrote {outfile_name}")


 def main(pdb_name):
    """
    Read Ligand Expo data, split pdb into protein and ligands,
    write protein pdb, write ligand sdf files
    :param pdb_name: id from the pdb, doesn't need to have an extension
    :return:
    """
    protein, ligand = get_pdb_components(pdb_name)
    write_pdb(protein, pdb_name)

    res_name_list = list(set(ligand.getResnames()))
    for res in res_name_list:
        new_mol = process_ligand(ligand, res)
        write_sdf(new_mol, pdb_name, res)


 if __name__ == "__main__":
    if len(sys.argv) == 2:
        main(sys.argv[1])
    else:
        print("Usage: {sys.argv[1]} pdb_id", file=sys.stderr)
	#!/usr/bin/env python

	import sys
	from prody import *
	from rdkit import Chem
	from rdkit.Chem import AllChem
	from io import StringIO
	import pypdb


	def get_pdb_components(pdb_id):
	"""
	Split a protein-ligand pdb into protein and ligand components
	:param pdb_id:
	:return:
	"""
	pdb = parsePDB(pdb_id)
	protein = pdb.select('protein')
	ligand = pdb.select('not protein and not water')
	return protein, ligand


	def process_ligand(ligand, res_name):
	"""
	Add bond orders to a pdb ligand
	1. Select the ligand component with name "res_name"
	2. Get the corresponding SMILES from pypdb
	3. Create a template molecule from the SMILES in step 2
	4. Write the PDB file to a stream
	5. Read the stream into an RDKit molecule
	6. Assign the bond orders from the template from step 3
	:param ligand: ligand as generated by prody
	:param res_name: residue name of ligand to extract
	:return: molecule with bond orders assigned
	"""
	output = StringIO()
	sub_mol = ligand.select(f"resname {res_name}")
	chem_desc = pypdb.describe_chemical(f"{res_name}")
	sub_smiles = chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"]
	template = AllChem.MolFromSmiles(sub_smiles)
	writePDBStream(output, sub_mol)
	pdb_string = output.getvalue()
	rd_mol = AllChem.MolFromPDBBlock(pdb_string)
	new_mol = AllChem.AssignBondOrdersFromTemplate(template, rd_mol)
	return new_mol


	def write_pdb(protein, pdb_name):
	"""
	Write a prody protein to a pdb file
	:param protein: protein object from prody
	:param pdb_name: base name for the pdb file
	:return: None
	"""
	output_pdb_name = f"{pdb_name}_protein.pdb"
	writePDB(f"{output_pdb_name}", protein)
	print(f"wrote {output_pdb_name}")


	def write_sdf(new_mol, pdb_name, res_name):
	"""
	Write an RDKit molecule to an SD file
	:param new_mol:
	:param pdb_name:
	:param res_name:
	:return:
	"""
	outfile_name = f"{pdb_name}_{res_name}_ligand.sdf"
	writer = Chem.SDWriter(f"{outfile_name}")
	writer.write(new_mol)
	print(f"wrote {outfile_name}")


	def main(pdb_name):
	"""
	Read Ligand Expo data, split pdb into protein and ligands,
	write protein pdb, write ligand sdf files
	:param pdb_name: id from the pdb, doesn't need to have an extension
	:return:
	"""
	protein, ligand = get_pdb_components(pdb_name)
	write_pdb(protein, pdb_name)

	res_name_list = list(set(ligand.getResnames()))
	for res in res_name_list:
	new_mol = process_ligand(ligand, res)
	write_sdf(new_mol, pdb_name, res)


	if __name__ == "__main__":
	if len(sys.argv) == 2:
	main(sys.argv[1])
	else:
	print("Usage: {sys.argv[1]} pdb_id", file=sys.stderr)