Source code for dpdata.gaussian.gjf

# The initial code of this file is based on
# https://github.com/deepmodeling/dpgen/blob/0767dce7cad29367edb2e4a55fd0d8724dbda642/dpgen/generator/lib/gaussian.py#L1-L190
# under LGPL 3.0 license
"""Generate Gaussian input file."""

from __future__ import annotations

import itertools
import re
import uuid
import warnings

import numpy as np

from dpdata.periodic_table import Element


def _crd2frag(symbols: list[str], crds: np.ndarray) -> tuple[int, list[int]]:
    """Detect fragments from coordinates.

    Parameters
    ----------
    symbols : list[str]
        element symbols; virtual elements are not supported
    crds : np.ndarray
        atomic coordinates, shape: (N, 3)

    Returns
    -------
    frag_numb : int
        number of fragments
    frag_index : list[int]
        frament index that each atom belongs to

    Notes
    -----
    In this method, Open Babel is used to detect bond connectivity. The threshold
    is the sum of covalent radii with a slight tolerance (0.45 A). Note that
    this threshold has errors.

    PBC support is removed from this method as Gaussian does not support PBC calculation.

    Raises
    ------
    ImportError
        if Open Babel is not installed
    """
    from scipy.sparse import csr_matrix
    from scipy.sparse.csgraph import connected_components

    try:
        from openbabel import openbabel
    except ImportError:
        import openbabel
    atomnumber = len(symbols)
    # Use openbabel to connect atoms
    mol = openbabel.OBMol()
    mol.BeginModify()
    for idx, (symbol, position) in enumerate(zip(symbols, crds.astype(np.float64))):
        num = Element(symbol).Z
        atom = mol.NewAtom(idx)
        atom.SetAtomicNum(int(num))
        atom.SetVector(*position)
    mol.ConnectTheDots()
    mol.PerceiveBondOrders()
    mol.EndModify()
    bonds = []
    for ii in range(mol.NumBonds()):
        bond = mol.GetBond(ii)
        a = bond.GetBeginAtom().GetId()
        b = bond.GetEndAtom().GetId()
        bo = bond.GetBondOrder()
        bonds.extend([[a, b, bo], [b, a, bo]])
    bonds = np.array(bonds, ndmin=2).reshape((-1, 3))
    graph = csr_matrix(
        (bonds[:, 2], (bonds[:, 0], bonds[:, 1])), shape=(atomnumber, atomnumber)
    )
    frag_numb, frag_index = connected_components(graph, 0)
    return frag_numb, frag_index


[docs] def detect_multiplicity(symbols: np.ndarray) -> int: """Find the minimal multiplicity of the given molecules. Parameters ---------- symbols : np.ndarray element symbols; virtual elements are not supported Returns ------- int spin multiplicity """ # currently only support charge=0 # oxygen -> 3 if np.count_nonzero(symbols == ["O"]) == 2 and symbols.size == 2: return 3 # calculates the total number of electrons, assumes they are paired as much as possible n_total = sum([Element(s).Z for s in symbols]) return n_total % 2 + 1
[docs] def make_gaussian_input( sys_data: dict, keywords: str | list[str], multiplicity: str | int = "auto", charge: int = 0, fragment_guesses: bool = False, basis_set: str | None = None, keywords_high_multiplicity: str | None = None, nproc: int = 1, ) -> str: """Make gaussian input file. Parameters ---------- sys_data : dict system data keywords : str or list[str] Gaussian keywords, e.g. force b3lyp/6-31g**. If a list, run multiple steps multiplicity : str or int, default=auto spin multiplicity state. It can be a number. If auto, multiplicity will be detected automatically, with the following rules: fragment_guesses=True multiplicity will +1 for each radical, and +2 for each oxygen molecule fragment_guesses=False multiplicity will be 1 or 2, but +2 for each oxygen molecule charge : int, default=0 molecule charge. Only used when charge is not provided by the system fragment_guesses : bool, default=False initial guess generated from fragment guesses. If True, multiplicity should be auto basis_set : str, default=None custom basis set keywords_high_multiplicity : str, default=None keywords for points with multiple raicals. multiplicity should be auto. If not set, fallback to normal keywords nproc : int, default=1 Number of CPUs to use Returns ------- str gjf output string """ coordinates = sys_data["coords"][0] atom_names = sys_data["atom_names"] atom_numbs = sys_data["atom_numbs"] atom_types = sys_data["atom_types"] # get atom symbols list symbols = [atom_names[atom_type] for atom_type in atom_types] # assume default charge is zero and default spin multiplicity is 1 if "charge" in sys_data.keys(): charge = sys_data["charge"] use_fragment_guesses = False if isinstance(multiplicity, int): mult_auto = False elif multiplicity == "auto": mult_auto = True else: raise RuntimeError('The keyword "multiplicity" is illegal.') if fragment_guesses: # Initial guess generated from fragment guesses # New feature of Gaussian 16 use_fragment_guesses = True if not mult_auto: warnings.warn("Automatically set multiplicity to auto!") mult_auto = True if mult_auto: frag_numb, frag_index = _crd2frag(symbols, coordinates) if frag_numb == 1: use_fragment_guesses = False mult_frags = [] for i in range(frag_numb): idx = frag_index == i mult_frags.append(detect_multiplicity(np.array(symbols)[idx])) if use_fragment_guesses: multiplicity = sum(mult_frags) - frag_numb + 1 - charge % 2 chargekeywords_frag = "%d %d" % (charge, multiplicity) + "".join( # noqa: UP031 [" %d %d" % (charge, mult_frag) for mult_frag in mult_frags] # noqa: UP031 ) else: multi_frags = np.array(mult_frags) multiplicity = ( 1 + np.count_nonzero(multi_frags == 2) % 2 + np.count_nonzero(multi_frags == 3) * 2 - charge % 2 ) if ( keywords_high_multiplicity is not None and np.count_nonzero(multi_frags == 2) >= 2 ): # at least 2 radicals keywords = keywords_high_multiplicity if isinstance(keywords, str): keywords = [keywords] else: keywords = keywords.copy() buff = [] # keywords, e.g., force b3lyp/6-31g** if use_fragment_guesses: keywords[0] = f"{keywords[0]} guess=fragment={frag_numb}" chkkeywords = [] if len(keywords) > 1: chkkeywords.append(f"%chk={str(uuid.uuid1())}.chk") nprockeywords = f"%nproc={nproc:d}" # use formula as title titlekeywords = "".join( [f"{symbol}{numb}" for symbol, numb in zip(atom_names, atom_numbs)] ) chargekeywords = f"{charge} {multiplicity}" buff = [ *chkkeywords, nprockeywords, f"#{keywords[0]}", "", titlekeywords, "", (chargekeywords_frag if use_fragment_guesses else chargekeywords), ] for ii, (symbol, coordinate) in enumerate(zip(symbols, coordinates)): if use_fragment_guesses: buff.append( "%s(Fragment=%d) %f %f %f" % (symbol, frag_index[ii] + 1, *coordinate) # noqa: UP031 ) else: buff.append("{} {:f} {:f} {:f}".format(symbol, *coordinate)) # noqa: UP031 if not sys_data.get("nopbc", False): # PBC condition cell = sys_data["cells"][0] for ii in range(3): # use TV as atomic symbol, see https://gaussian.com/pbc/ buff.append("TV {:f} {:f} {:f}".format(*cell[ii])) if basis_set is not None: # custom basis set buff.extend(["", basis_set, ""]) for kw in itertools.islice(keywords, 1, None): buff.extend( [ "\n--link1--", *chkkeywords, nprockeywords, f"#{kw}", "", titlekeywords, "", chargekeywords, "", ] ) buff.append("\n") return "\n".join(buff)
[docs] def read_gaussian_input(inp: str): """Read Gaussian input. Parameters ---------- inp : str Gaussian input str Returns ------- dict system data """ flag = 0 coords = [] elements = [] cells = [] for line in inp.split("\n"): if not line.strip(): # empty line flag += 1 elif flag == 0: # keywords if line.startswith("#"): # setting keywords = line.split() elif line.startswith("%"): pass elif flag == 1: # title pass elif flag == 2: # multi and coords s = line.split() if len(s) == 2: pass elif len(s) == 4: if s[0] == "TV": cells.append(list(map(float, s[1:4]))) else: # element elements.append(re.sub("\\(.*?\\)|\\{.*?}|\\[.*?]", "", s[0])) coords.append(list(map(float, s[1:4]))) elif flag == 3: # end break atom_names, atom_types, atom_numbs = np.unique( elements, return_inverse=True, return_counts=True ) if len(cells): nopbc = False else: nopbc = True cells = np.array([np.eye(3)]) * 100 return { "atom_names": list(atom_names), "atom_numbs": list(atom_numbs), "atom_types": atom_types, "cells": np.array(cells).reshape(1, 3, 3), "nopbc": nopbc, "coords": np.array(coords).reshape(1, -1, 3), "orig": np.zeros(3), }