Created
October 21, 2018 18:25
-
-
Save ghutchis/4721ba55b695afd1df84423e2b495719 to your computer and use it in GitHub Desktop.
Normal Mode Conformer Generation using cclib and OpenBabel Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
element_symbols = [ | |
"Xx", "H", "He", "Li", "Be", "B", "C", "N", "O", "F", | |
"Ne", "Na", "Mg", "Al", "Si", "P", "S", "Cl", "Ar", "K", | |
"Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", | |
"Zn", "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y", | |
"Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", | |
"Sn", "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", | |
"Nd", "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", | |
"Yb", "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", | |
"Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", | |
"Th", "Pa", "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", | |
"Fm", "Md", "No", "Lr", "Rf", "Db", "Sg", "Bh", "Hs", "Mt", | |
"Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts", "Og" ] | |
element_masses = [ | |
# from IUPAC http://www.chem.qmul.ac.uk/iupac/AtWt/ | |
# (2015 set, updated from 2013) | |
0, 1.00784, 4.0026, | |
6.938, 9.01218, 10.806, 12.011, 14.006, 15.9994, 18.9984, 20.1797, | |
22.9898, 24.305, 26.9815, | |
28.0855, 30.9738, 32.065, 35.453, 39.948, 39.0983, 40.078, | |
44.9559, 47.867, 50.9415, 51.9961, 54.938, 55.845, 58.9332, | |
58.6934, 63.546, 65.38, 69.723, 72.64, 74.9216, 78.971, | |
79.904, 83.798, 85.4678, 87.62, 88.9058, 91.224, 92.9064, | |
95.95, 97, 101.07, 102.9055, 106.42, 107.8682, 112.414, | |
114.818, 118.71, 121.76, 127.6, 126.9045, 131.293, 132.9055, | |
137.327, 138.9055, 140.116, 140.9077, 144.242, 145, 150.36, | |
151.964, 157.25, 158.9253, 162.5, 164.9303, 167.259, 168.9342, | |
173.045, 174.9668, 178.49, 180.9479, 183.84, 186.207, 190.23, | |
192.217, 195.084, 196.9666, 200.592, 204.38, 207.2, 208.9804, | |
209, 210, 222, 223, 226, 227, 232.0377, | |
231.0358, 238.0289, 237, 244, 243, 247, 247, | |
251, 252, 257, 258, 259, 262, 267, | |
270, 269, 270, 270, 278, 281, 281, | |
285, 286, 289, 289, 293, 293, 294 ] | |
element_radii = [ # covalent radii | |
# From Pyykko doi: 10.1002/chem.200800987 | |
# Dummy, 1st row | |
0.18, 0.32, 0.46, | |
# 2nd row | |
1.33, 1.02, 0.85, 0.75, 0.71, 0.63, 0.64, 0.67, | |
# 3rd row | |
1.55, 1.39, 1.26, 1.16, 1.11, 1.03, 0.99, 0.96, | |
# 4th row K, Ca | |
1.96, 1.71, | |
# 1st row TM (Sc.. Zn) | |
1.48, 1.36, 1.34, 1.22, 1.19, 1.16, 1.11, 1.10, 1.12, 1.18, | |
# 4th row p-block (Ga..Kr) | |
1.24, 1.21, 1.21, 1.16, 1.14, 1.17, | |
# 5th row Rb, Sr | |
2.10, 1.85, | |
# 2nd row TM (Y..Cd) | |
1.63, 1.54, 1.47, 1.38, 1.28, 1.25, 1.25, 1.20, 1.28, 1.36, | |
# 5th row p-block (In..Xe) | |
1.42, 1.40, 1.40, 1.36, 1.33, 1.31, | |
# 6th row Cs, Ba | |
2.32, 1.96, | |
# Lanthanides La..Gd | |
1.80, 1.63, 1.76, 1.74, 1.73, 1.72, 1.68, | |
# Lanthanides Tb..Yb | |
1.69, 1.68, 1.67, 1.66, 1.65, 1.64, 1.70, | |
# 3rd row TM (Lu..Hg) | |
1.62, 1.52, 1.46, 1.37, 1.31, 1.29, 1.22, 1.23, 1.24, 1.33, | |
# 6th row p-block (Tl..Rn) | |
1.44, 1.44, 1.51, 1.45, 1.47, 1.42, | |
# 7th row Fr, Ra | |
2.23, 2.01, | |
# Actinides (Ac.. Am) | |
1.86, 1.75, 1.69, 1.70, 1.71, 1.72, 1.66, | |
# Actinides (Cm..No) | |
1.66, 1.68, 1.68, 1.65, 1.67, 1.73, 1.76, | |
# Trans-actinides | |
1.61, 1.57, 1.49, 1.43, 1.41, 1.34, 1.29, 1.28, 1.21, 1.22, | |
1.36, 1.43, 1.62, 1.75, 1.65, 1.57 | |
] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import print_function | |
import sys, os | |
import logging | |
import numpy as np | |
from cclib.io import ccopen | |
from nms import nmsgenerator | |
from data import element_masses, element_symbols | |
def nms_ts(n): | |
"""Normal Mode Temperature and Samples | |
Args: | |
n (int): The number of non-hydrogen atoms. | |
Returns: | |
temperature (float): thermal energy to populate normal normal_modes | |
samples (int): number of samples to consider (ignored) | |
""" | |
stable = {1: (2000.0, 500), | |
2: (1500.0, 450), | |
3: (1000.0, 425), | |
4: ( 600.0, 400), | |
5: ( 600.0, 200), | |
6: ( 600.0, 30), | |
7: ( 600.0, 20), | |
8: ( 450.0, 5), | |
9: ( 450.0, 3)} | |
if n in stable: return stable[n] | |
else: return (400, 2) | |
# read through a bunch of files on the command-line | |
for arg in sys.argv[1:]: | |
# read the file with cclib | |
file = ccopen(arg) | |
file.logger.setLevel(logging.ERROR) | |
molecule = file.parse() | |
# the atom labels (here as atomic numbers) | |
labels = molecule.atomnos | |
try: | |
# not all cclib parsers will populate atommasses | |
# but this is more accurate, since it would include isotopes | |
mass = np.asarray(molecule.atommasses).repeat(3) | |
except AttributeError: | |
mass = np.asarray([element_masses[i] for i in labels]).repeat(3) | |
# get the last geometry from a geometry optimization | |
# should end up as an ndarray (N, 3) | |
xyz = np.asarray(molecule.atomcoords[-1]).reshape(-1,3) | |
# vibrational frequencies (remove any zeros, e.g., translations, rotations) | |
freq = np.asarray(molecule.vibfreqs) | |
freq = freq[freq !=0.0] # should now be an array of size (3N-6,) | |
# get flattened normal modes and reshape to (3N, 3N) | |
# ensure they're in (CartesianCoordinate, ModeNumber) | |
nmo = np.asarray(molecule.vibdisps).reshape(-1, xyz.shape[0]*3) # [3N, 3N] | |
# orca filters out translations and rotations | |
# they appear as modes with all-zero components | |
# so please remove them | |
nmo = nmo[np.array([not (i == 0.0).all() for i in nmo])] | |
# compute mass weighted force constants for each of 3N-6 normal modes | |
# shapes: [3N-6] * ([3N-6, 3N] * [3N,]).sum(axis=1) = [3N-6,] | |
fcc = freq * freq * (nmo * nmo * mass).sum(axis=1) / 16.9744 / 10e4 | |
# get the temperature and number of samples for sampling | |
t, s = nms_ts(np.count_nonzero(labels != 1)) | |
g = nmsgenerator(xyz, nmo, fcc, labels, t, 0) | |
# generate 100 displaced geometries into new XYZ files | |
filename, extension = os.path.splitext(arg) | |
for i in range(100): | |
energy, rxyz = g.get_random_structure(100) | |
# print an xyz file | |
xyzstr = '{}\n{}\n'.format(len(rxyz), energy) | |
for l, x in zip(labels, rxyz): | |
xyzstr += '{} {} {} {}\n'.format(element_symbols[l], *x) | |
new_name = '{}.xyz'.format(i) | |
with open(new_name, 'w') as f: | |
f.write(xyzstr) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pybel | |
import openbabel as ob | |
from data import element_radii | |
mDynetoMet = 1.0e-5 * 1.0e-3 * 1.0e10 | |
# boltzmann constant (in ) | |
Kb = 1.38064852e-23 | |
# meters to Angstrom | |
MtoA = 1.0e10 | |
class nmsgenerator(): | |
# xyz = initial min structure | |
# nmo = normal mode displacements | |
# fcc = force constants | |
# spc = atomic species list | |
# T = temperature of displacement | |
try: | |
et = ob.OBElementTable | |
except: | |
et = {} # OpenBabel 3 doesn't use this class anymore | |
def __init__(self,xyz,nmo,fcc,spc,T,minfc = 1.0E-3): | |
self.xyz = xyz | |
self.nmo = nmo | |
self.labels = spc | |
self.fcc = np.array([i if i > minfc else minfc for i in fcc]) | |
self.chg = np.array([self.__getCharge__(i) for i in spc]) | |
self.radii = [element_radii[i] for i in self.chg] | |
self.T = T | |
self.Na = xyz.shape[0] | |
self.Nf = nmo.shape[0] | |
self.e0 = self.__get_energy__(xyz) | |
# returns atomic charge (number) | |
def __getCharge__(self,t): | |
if isinstance(t, (int, np.int32)): | |
return t | |
return self.et.GetAtomicNum(t) | |
# Checks for small atomic distances | |
def __check_atomic_distances__(self,rxyz): | |
for i in range(0,self.Na): | |
for j in range(i+1,self.Na): | |
Rij = np.linalg.norm(rxyz[i]-rxyz[j]) | |
if Rij < 0.6 * (self.radii[i] * self.radii[j]): | |
return True | |
return False | |
# get energy | |
def __get_energy__(self, rxyz): | |
xyzstr = '{}\n\n'.format(len(rxyz)) | |
for l, x in zip(self.labels, rxyz): | |
xyzstr += '{} {} {} {}\n'.format(l, *x) | |
mol = pybel.readstring('xyz', xyzstr) | |
ff = pybel._forcefields['uff'] | |
if not ff.Setup(mol.OBMol): | |
return None | |
return ff.Energy() | |
# Generate a structure | |
def __genrandomstruct__(self): | |
rdt = np.random.random(self.Nf+1) | |
rdt[0] = 0.0 | |
norm = np.random.random(1)[0] | |
rdt = norm*np.sort(rdt) | |
rdt[self.Nf] = norm | |
oxyz = self.xyz.copy() | |
for i in range(self.Nf): | |
# convert force constants from mDyne to atomic | |
Ki = mDynetoMet * self.fcc[i] | |
ci = rdt[i+1]-rdt[i] | |
Sn = -1.0 if np.random.binomial(1,0.5,1) else 1.0 | |
Ri = Sn * MtoA * np.sqrt((3.0 * ci * float(self.Na) * Kb * self.T)/Ki) | |
oxyz = oxyz + (Ri * self.nmo[i]).reshape(-1, 3) | |
return oxyz | |
# Call this to return a random structure | |
def get_random_structure(self, emax=100): | |
gs = True | |
it = 0 | |
while gs: | |
rxyz = self.__genrandomstruct__() | |
#gs = self.__check_atomic_distances__(rxyz) | |
e1 = self.__get_energy__(rxyz) | |
if not e1: | |
continue | |
gs = e1 - self.e0 - emax > 0 | |
it += 1 | |
#if it % 100 == 0: | |
# print "Failed after ", it | |
return e1, rxyz |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment