Source code for dpgen.generator.lib.gaussian
#!/usr/bin/python3
import itertools
import uuid
import warnings
import dpdata
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import connected_components
try:
# expect openbabel >= 3.1.0
from openbabel import openbabel
except ImportError:
pass
try:
from ase import Atom, Atoms
from ase.data import atomic_numbers
except ImportError:
pass
def _crd2frag(symbols, crds, pbc=False, cell=None, return_bonds=False):
atomnumber = len(symbols)
all_atoms = Atoms(symbols=symbols, positions=crds, pbc=pbc, cell=cell)
# Use openbabel to connect atoms
mol = openbabel.OBMol()
mol.BeginModify()
for idx, (num, position) in enumerate(
zip(all_atoms.get_atomic_numbers(), all_atoms.positions)
):
atom = mol.NewAtom(idx)
atom.SetAtomicNum(int(num))
atom.SetVector(*position)
# Apply period boundry conditions
# openbabel#1853, supported in v3.1.0
if pbc:
uc = openbabel.OBUnitCell()
uc.SetData(
openbabel.vector3(cell[0][0], cell[0][1], cell[0][2]),
openbabel.vector3(cell[1][0], cell[1][1], cell[1][2]),
openbabel.vector3(cell[2][0], cell[2][1], cell[2][2]),
)
mol.CloneData(uc)
mol.SetPeriodicMol()
mol.ConnectTheDots()
mol.PerceiveBondOrders()
mol.EndModify()
bonds = []
for ii in range(mol.NumBonds()):
bond = mol.GetBond(ii)
a = bond.GetBeginAtom().GetId()
b = bond.GetEndAtom().GetId()
bo = bond.GetBondOrder()
bonds.extend([[a, b, bo], [b, a, bo]])
bonds = np.array(bonds, ndmin=2).reshape((-1, 3))
graph = csr_matrix(
(bonds[:, 2], (bonds[:, 0], bonds[:, 1])), shape=(atomnumber, atomnumber)
)
frag_numb, frag_index = connected_components(graph, 0)
if return_bonds:
return frag_numb, frag_index, graph
return frag_numb, frag_index
def _crd2mul(symbols, crds):
atomnumber = len(symbols)
xyzstring = "".join(
(
f"{atomnumber}\nDPGEN\n",
"\n".join(
[
f"{s:2s} {x:22.15f} {y:22.15f} {z:22.15f}"
for s, (x, y, z) in zip(symbols, crds)
]
),
)
)
conv = openbabel.OBConversion()
conv.SetInAndOutFormats("xyz", "gjf")
mol = openbabel.OBMol()
conv.ReadString(mol, xyzstring)
gjfstring = conv.WriteString(mol)
mul = int(gjfstring.split("\n")[5].split()[1])
return mul
[docs]
def detect_multiplicity(symbols):
# currently only support charge=0
# oxygen -> 3
if np.count_nonzero(symbols == ["O"]) == 2 and symbols.size == 2:
return 3
# calculates the total number of electrons, assumes they are paired as much as possible
n_total = sum([atomic_numbers[s] for s in symbols])
return n_total % 2 + 1
[docs]
def make_gaussian_input(sys_data, fp_params):
coordinates = sys_data["coords"][0]
atom_names = sys_data["atom_names"]
# atom_numbs = sys_data['atom_numbs']
atom_types = sys_data["atom_types"]
# get atom symbols list
symbols = [atom_names[atom_type] for atom_type in atom_types]
nproc = fp_params["nproc"]
if (
"keywords_high_multiplicity" in fp_params
and _crd2mul(symbols, coordinates) >= 3
):
# multiplicity >= 3, meaning at least 2 radicals
keywords = fp_params["keywords_high_multiplicity"]
else:
keywords = fp_params["keywords"]
if isinstance(keywords, str):
keywords = [keywords]
else:
keywords = keywords.copy()
# assume default charge is zero and default spin multiplicity is 1
if "charge" in sys_data.keys():
charge = sys_data["charge"]
else:
charge = fp_params.get("charge", 0)
use_fragment_guesses = False
multiplicity = fp_params.get("multiplicity", "auto")
if isinstance(multiplicity, int):
multiplicity = fp_params["multiplicity"]
mult_auto = False
elif multiplicity == "auto":
mult_auto = True
else:
raise RuntimeError('The keyword "multiplicity" is illegal.')
if fp_params.get("fragment_guesses", False):
# Initial guess generated from fragment guesses
# New feature of Gaussian 16
use_fragment_guesses = True
if not mult_auto:
warnings.warn("Automatically set multiplicity to auto!")
mult_auto = True
if mult_auto:
frag_numb, frag_index = _crd2frag(symbols, coordinates)
if frag_numb == 1:
use_fragment_guesses = False
mult_frags = []
for i in range(frag_numb):
idx = frag_index == i
mult_frags.append(detect_multiplicity(np.array(symbols)[idx]))
if use_fragment_guesses:
multiplicity = sum(mult_frags) - frag_numb + 1
chargekeywords_frag = "%d %d" % (charge, multiplicity) + "".join(
[" %d %d" % (charge, mult_frag) for mult_frag in mult_frags]
)
else:
multi_frags = np.array(mult_frags)
multiplicity = (
1
+ np.count_nonzero(multi_frags == 2) % 2
+ np.count_nonzero(multi_frags == 3) * 2
)
buff = []
# keywords, e.g., force b3lyp/6-31g**
if use_fragment_guesses:
keywords[0] = f"{keywords[0]} guess=fragment={frag_numb}"
chkkeywords = []
if len(keywords) > 1:
chkkeywords.append(f"%chk={str(uuid.uuid1())}.chk")
nprockeywords = f"%nproc={nproc:d}"
titlekeywords = "DPGEN"
chargekeywords = f"{charge} {multiplicity}"
buff = [
*chkkeywords,
nprockeywords,
f"#{keywords[0]}",
"",
titlekeywords,
"",
(chargekeywords_frag if use_fragment_guesses else chargekeywords),
]
for ii, (symbol, coordinate) in enumerate(zip(symbols, coordinates)):
if use_fragment_guesses:
buff.append(
"%s(Fragment=%d) %f %f %f" % (symbol, frag_index[ii] + 1, *coordinate)
)
else:
buff.append("{} {:f} {:f} {:f}".format(symbol, *coordinate))
if "basis_set" in fp_params:
# custom basis set
buff.extend(["", fp_params["basis_set"], ""])
for kw in itertools.islice(keywords, 1, None):
buff.extend(
[
"\n--link1--",
*chkkeywords,
nprockeywords,
f"#{kw}",
"",
titlekeywords,
"",
chargekeywords,
"",
]
)
buff.append("\n")
return "\n".join(buff)
[docs]
def take_cluster(old_conf_name, type_map, idx, jdata):
cutoff = jdata["cluster_cutoff"]
cutoff_hard = jdata.get("cluster_cutoff_hard", None)
sys = dpdata.System(old_conf_name, fmt="lammps/dump", type_map=type_map)
atom_names = sys["atom_names"]
atom_types = sys["atom_types"]
cell = sys["cells"][0]
coords = sys["coords"][0]
symbols = [atom_names[atom_type] for atom_type in atom_types]
# detect fragment
frag_numb, frag_index, graph = _crd2frag(
symbols, coords, True, cell, return_bonds=True
)
# get_distances
all_atoms = Atoms(symbols=symbols, positions=coords, pbc=True, cell=cell)
all_atoms[idx].tag = 1
distances = all_atoms.get_distances(idx, range(len(all_atoms)), mic=True)
distancescutoff = distances < cutoff
cutoff_atoms_idx = np.where(distancescutoff)[0]
if cutoff_hard is not None:
distancescutoff_hard = distances < cutoff_hard
cutoff_atoms_idx_hard = np.where(distancescutoff_hard)[0]
# make cutoff atoms in molecules
taken_atoms_idx = []
added = []
for ii in range(frag_numb):
frag_atoms_idx = np.where(frag_index == ii)[0]
if cutoff_hard is not None:
# drop atoms out of the hard cutoff anyway
frag_atoms_idx = np.intersect1d(frag_atoms_idx, cutoff_atoms_idx_hard)
if np.any(np.isin(frag_atoms_idx, cutoff_atoms_idx)):
if "cluster_minify" in jdata and jdata["cluster_minify"]:
# support for organic species
take_frag_idx = []
for aa in frag_atoms_idx:
if np.any(np.isin(aa, cutoff_atoms_idx)):
# atom is in the soft cutoff
# pick up anyway
take_frag_idx.append(aa)
elif np.count_nonzero(
np.logical_and(distancescutoff, graph.toarray()[aa] == 1)
):
# atom is between the hard cutoff and the soft cutoff
# and has a single bond with the atom inside
if all_atoms[aa].symbol == "H":
# for atom H: just add it
take_frag_idx.append(aa)
else:
# for other atoms (C, O, etc.): replace it with a ghost H atom
near_atom_idx = np.nonzero(
np.logical_and(distancescutoff, graph.toarray()[aa] > 0)
)[0][0]
vector = (
all_atoms[aa].position
- all_atoms[near_atom_idx].position
)
new_position = (
all_atoms[near_atom_idx].position
+ vector / np.linalg.norm(vector) * 1.09
)
added.append(Atom("H", new_position))
elif np.count_nonzero(
np.logical_and(distancescutoff, graph.toarray()[aa] > 1)
):
# if that atom has a double bond with the atom inside
# just pick up the whole fragment (within the hard cutoff)
# TODO: use a more fantastic method
take_frag_idx = frag_atoms_idx
break
else:
take_frag_idx = frag_atoms_idx
taken_atoms_idx.append(take_frag_idx)
all_taken_atoms_idx = np.concatenate(taken_atoms_idx)
# wrap
cutoff_atoms = sum(added, all_atoms[all_taken_atoms_idx])
cutoff_atoms.wrap(
center=coords[idx] / cutoff_atoms.get_cell_lengths_and_angles()[0:3], pbc=True
)
coords = cutoff_atoms.get_positions()
sys.data["coords"] = np.array([coords])
sys.data["atom_types"] = np.array(
list(atom_types[all_taken_atoms_idx]) + [atom_names.index("H")] * len(added)
)
sys.data["atom_pref"] = np.array([cutoff_atoms.get_tags()])
for ii, _ in enumerate(atom_names):
sys.data["atom_numbs"][ii] = np.count_nonzero(sys.data["atom_types"] == ii)
return sys