Source code for dpgen.generator.lib.gaussian

#!/usr/bin/python3


import itertools
import uuid
import warnings

import dpdata
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import connected_components

try:
    # expect openbabel >= 3.1.0
    from openbabel import openbabel
except ImportError:
    pass
try:
    from ase import Atom, Atoms
    from ase.data import atomic_numbers
except ImportError:
    pass


def _crd2frag(symbols, crds, pbc=False, cell=None, return_bonds=False):
    atomnumber = len(symbols)
    all_atoms = Atoms(symbols=symbols, positions=crds, pbc=pbc, cell=cell)
    # Use openbabel to connect atoms
    mol = openbabel.OBMol()
    mol.BeginModify()
    for idx, (num, position) in enumerate(
        zip(all_atoms.get_atomic_numbers(), all_atoms.positions)
    ):
        atom = mol.NewAtom(idx)
        atom.SetAtomicNum(int(num))
        atom.SetVector(*position)
    # Apply period boundry conditions
    # openbabel#1853, supported in v3.1.0
    if pbc:
        uc = openbabel.OBUnitCell()
        uc.SetData(
            openbabel.vector3(cell[0][0], cell[0][1], cell[0][2]),
            openbabel.vector3(cell[1][0], cell[1][1], cell[1][2]),
            openbabel.vector3(cell[2][0], cell[2][1], cell[2][2]),
        )
        mol.CloneData(uc)
        mol.SetPeriodicMol()
    mol.ConnectTheDots()
    mol.PerceiveBondOrders()
    mol.EndModify()
    bonds = []
    for ii in range(mol.NumBonds()):
        bond = mol.GetBond(ii)
        a = bond.GetBeginAtom().GetId()
        b = bond.GetEndAtom().GetId()
        bo = bond.GetBondOrder()
        bonds.extend([[a, b, bo], [b, a, bo]])
    bonds = np.array(bonds, ndmin=2).reshape((-1, 3))
    graph = csr_matrix(
        (bonds[:, 2], (bonds[:, 0], bonds[:, 1])), shape=(atomnumber, atomnumber)
    )
    frag_numb, frag_index = connected_components(graph, 0)
    if return_bonds:
        return frag_numb, frag_index, graph
    return frag_numb, frag_index


def _crd2mul(symbols, crds):
    atomnumber = len(symbols)
    xyzstring = "".join(
        (
            f"{atomnumber}\nDPGEN\n",
            "\n".join(
                [
                    f"{s:2s} {x:22.15f} {y:22.15f} {z:22.15f}"
                    for s, (x, y, z) in zip(symbols, crds)
                ]
            ),
        )
    )
    conv = openbabel.OBConversion()
    conv.SetInAndOutFormats("xyz", "gjf")
    mol = openbabel.OBMol()
    conv.ReadString(mol, xyzstring)
    gjfstring = conv.WriteString(mol)
    mul = int(gjfstring.split("\n")[5].split()[1])
    return mul


[docs] def detect_multiplicity(symbols): # currently only support charge=0 # oxygen -> 3 if np.count_nonzero(symbols == ["O"]) == 2 and symbols.size == 2: return 3 # calculates the total number of electrons, assumes they are paired as much as possible n_total = sum([atomic_numbers[s] for s in symbols]) return n_total % 2 + 1
[docs] def make_gaussian_input(sys_data, fp_params): coordinates = sys_data["coords"][0] atom_names = sys_data["atom_names"] # atom_numbs = sys_data['atom_numbs'] atom_types = sys_data["atom_types"] # get atom symbols list symbols = [atom_names[atom_type] for atom_type in atom_types] nproc = fp_params["nproc"] if ( "keywords_high_multiplicity" in fp_params and _crd2mul(symbols, coordinates) >= 3 ): # multiplicity >= 3, meaning at least 2 radicals keywords = fp_params["keywords_high_multiplicity"] else: keywords = fp_params["keywords"] if isinstance(keywords, str): keywords = [keywords] else: keywords = keywords.copy() # assume default charge is zero and default spin multiplicity is 1 if "charge" in sys_data.keys(): charge = sys_data["charge"] else: charge = fp_params.get("charge", 0) use_fragment_guesses = False multiplicity = fp_params.get("multiplicity", "auto") if isinstance(multiplicity, int): multiplicity = fp_params["multiplicity"] mult_auto = False elif multiplicity == "auto": mult_auto = True else: raise RuntimeError('The keyword "multiplicity" is illegal.') if fp_params.get("fragment_guesses", False): # Initial guess generated from fragment guesses # New feature of Gaussian 16 use_fragment_guesses = True if not mult_auto: warnings.warn("Automatically set multiplicity to auto!") mult_auto = True if mult_auto: frag_numb, frag_index = _crd2frag(symbols, coordinates) if frag_numb == 1: use_fragment_guesses = False mult_frags = [] for i in range(frag_numb): idx = frag_index == i mult_frags.append(detect_multiplicity(np.array(symbols)[idx])) if use_fragment_guesses: multiplicity = sum(mult_frags) - frag_numb + 1 chargekeywords_frag = "%d %d" % (charge, multiplicity) + "".join( [" %d %d" % (charge, mult_frag) for mult_frag in mult_frags] ) else: multi_frags = np.array(mult_frags) multiplicity = ( 1 + np.count_nonzero(multi_frags == 2) % 2 + np.count_nonzero(multi_frags == 3) * 2 ) buff = [] # keywords, e.g., force b3lyp/6-31g** if use_fragment_guesses: keywords[0] = f"{keywords[0]} guess=fragment={frag_numb}" chkkeywords = [] if len(keywords) > 1: chkkeywords.append(f"%chk={str(uuid.uuid1())}.chk") nprockeywords = f"%nproc={nproc:d}" titlekeywords = "DPGEN" chargekeywords = f"{charge} {multiplicity}" buff = [ *chkkeywords, nprockeywords, f"#{keywords[0]}", "", titlekeywords, "", (chargekeywords_frag if use_fragment_guesses else chargekeywords), ] for ii, (symbol, coordinate) in enumerate(zip(symbols, coordinates)): if use_fragment_guesses: buff.append( "%s(Fragment=%d) %f %f %f" % (symbol, frag_index[ii] + 1, *coordinate) ) else: buff.append("{} {:f} {:f} {:f}".format(symbol, *coordinate)) if "basis_set" in fp_params: # custom basis set buff.extend(["", fp_params["basis_set"], ""]) for kw in itertools.islice(keywords, 1, None): buff.extend( [ "\n--link1--", *chkkeywords, nprockeywords, f"#{kw}", "", titlekeywords, "", chargekeywords, "", ] ) buff.append("\n") return "\n".join(buff)
[docs] def take_cluster(old_conf_name, type_map, idx, jdata): cutoff = jdata["cluster_cutoff"] cutoff_hard = jdata.get("cluster_cutoff_hard", None) sys = dpdata.System(old_conf_name, fmt="lammps/dump", type_map=type_map) atom_names = sys["atom_names"] atom_types = sys["atom_types"] cell = sys["cells"][0] coords = sys["coords"][0] symbols = [atom_names[atom_type] for atom_type in atom_types] # detect fragment frag_numb, frag_index, graph = _crd2frag( symbols, coords, True, cell, return_bonds=True ) # get_distances all_atoms = Atoms(symbols=symbols, positions=coords, pbc=True, cell=cell) all_atoms[idx].tag = 1 distances = all_atoms.get_distances(idx, range(len(all_atoms)), mic=True) distancescutoff = distances < cutoff cutoff_atoms_idx = np.where(distancescutoff)[0] if cutoff_hard is not None: distancescutoff_hard = distances < cutoff_hard cutoff_atoms_idx_hard = np.where(distancescutoff_hard)[0] # make cutoff atoms in molecules taken_atoms_idx = [] added = [] for ii in range(frag_numb): frag_atoms_idx = np.where(frag_index == ii)[0] if cutoff_hard is not None: # drop atoms out of the hard cutoff anyway frag_atoms_idx = np.intersect1d(frag_atoms_idx, cutoff_atoms_idx_hard) if np.any(np.isin(frag_atoms_idx, cutoff_atoms_idx)): if "cluster_minify" in jdata and jdata["cluster_minify"]: # support for organic species take_frag_idx = [] for aa in frag_atoms_idx: if np.any(np.isin(aa, cutoff_atoms_idx)): # atom is in the soft cutoff # pick up anyway take_frag_idx.append(aa) elif np.count_nonzero( np.logical_and(distancescutoff, graph.toarray()[aa] == 1) ): # atom is between the hard cutoff and the soft cutoff # and has a single bond with the atom inside if all_atoms[aa].symbol == "H": # for atom H: just add it take_frag_idx.append(aa) else: # for other atoms (C, O, etc.): replace it with a ghost H atom near_atom_idx = np.nonzero( np.logical_and(distancescutoff, graph.toarray()[aa] > 0) )[0][0] vector = ( all_atoms[aa].position - all_atoms[near_atom_idx].position ) new_position = ( all_atoms[near_atom_idx].position + vector / np.linalg.norm(vector) * 1.09 ) added.append(Atom("H", new_position)) elif np.count_nonzero( np.logical_and(distancescutoff, graph.toarray()[aa] > 1) ): # if that atom has a double bond with the atom inside # just pick up the whole fragment (within the hard cutoff) # TODO: use a more fantastic method take_frag_idx = frag_atoms_idx break else: take_frag_idx = frag_atoms_idx taken_atoms_idx.append(take_frag_idx) all_taken_atoms_idx = np.concatenate(taken_atoms_idx) # wrap cutoff_atoms = sum(added, all_atoms[all_taken_atoms_idx]) cutoff_atoms.wrap( center=coords[idx] / cutoff_atoms.get_cell_lengths_and_angles()[0:3], pbc=True ) coords = cutoff_atoms.get_positions() sys.data["coords"] = np.array([coords]) sys.data["atom_types"] = np.array( list(atom_types[all_taken_atoms_idx]) + [atom_names.index("H")] * len(added) ) sys.data["atom_pref"] = np.array([cutoff_atoms.get_tags()]) for ii, _ in enumerate(atom_names): sys.data["atom_numbs"][ii] = np.count_nonzero(sys.data["atom_types"] == ii) return sys