Source code for dpdata.gaussian.gjf

# The initial code of this file is based on
# https://github.com/deepmodeling/dpgen/blob/0767dce7cad29367edb2e4a55fd0d8724dbda642/dpgen/generator/lib/gaussian.py#L1-L190
# under LGPL 3.0 license
"""Generate Gaussian input file."""

import itertools
import re
import uuid
import warnings
from typing import List, Optional, Tuple, Union

import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import connected_components

try:
    from openbabel import openbabel
except ImportError:
    try:
        import openbabel
    except ImportError:
        openbabel = None
from dpdata.periodic_table import Element


def _crd2frag(symbols: List[str], crds: np.ndarray) -> Tuple[int, List[int]]:
    """Detect fragments from coordinates.

    Parameters
    ----------
    symbols : list[str]
        element symbols; virtual elements are not supported
    crds : np.ndarray
        atomic coordinates, shape: (N, 3)

    Returns
    -------
    frag_numb : int
        number of fragments
    frag_index : list[int]
        frament index that each atom belongs to

    Notes
    -----
    In this method, Open Babel is used to detect bond connectivity. The threshold
    is the sum of covalent radii with a slight tolerance (0.45 A). Note that
    this threshold has errors.

    PBC support is removed from this method as Gaussian does not support PBC calculation.

    Raises
    ------
    ImportError
        if Open Babel is not installed
    """
    if openbabel is None:
        raise ImportError(
            "Open Babel (Python interface) should be installed to detect fragmentation!"
        )
    atomnumber = len(symbols)
    # Use openbabel to connect atoms
    mol = openbabel.OBMol()
    mol.BeginModify()
    for idx, (symbol, position) in enumerate(zip(symbols, crds.astype(np.float64))):
        num = Element(symbol).Z
        atom = mol.NewAtom(idx)
        atom.SetAtomicNum(int(num))
        atom.SetVector(*position)
    mol.ConnectTheDots()
    mol.PerceiveBondOrders()
    mol.EndModify()
    bonds = []
    for ii in range(mol.NumBonds()):
        bond = mol.GetBond(ii)
        a = bond.GetBeginAtom().GetId()
        b = bond.GetEndAtom().GetId()
        bo = bond.GetBondOrder()
        bonds.extend([[a, b, bo], [b, a, bo]])
    bonds = np.array(bonds, ndmin=2).reshape((-1, 3))
    graph = csr_matrix(
        (bonds[:, 2], (bonds[:, 0], bonds[:, 1])), shape=(atomnumber, atomnumber)
    )
    frag_numb, frag_index = connected_components(graph, 0)
    return frag_numb, frag_index



[docs]
def detect_multiplicity(symbols: np.ndarray) -> int:
    """Find the minimal multiplicity of the given molecules.

    Parameters
    ----------
    symbols : np.ndarray
        element symbols; virtual elements are not supported

    Returns
    -------
    int
        spin multiplicity
    """
    # currently only support charge=0
    # oxygen -> 3
    if np.count_nonzero(symbols == ["O"]) == 2 and symbols.size == 2:
        return 3
    # calculates the total number of electrons, assumes they are paired as much as possible
    n_total = sum([Element(s).Z for s in symbols])
    return n_total % 2 + 1




[docs]
def make_gaussian_input(
    sys_data: dict,
    keywords: Union[str, List[str]],
    multiplicity: Union[str, int] = "auto",
    charge: int = 0,
    fragment_guesses: bool = False,
    basis_set: Optional[str] = None,
    keywords_high_multiplicity: Optional[str] = None,
    nproc: int = 1,
) -> str:
    """Make gaussian input file.

    Parameters
    ----------
    sys_data : dict
        system data
    keywords : str or list[str]
        Gaussian keywords, e.g. force b3lyp/6-31g**. If a list,
        run multiple steps
    multiplicity : str or int, default=auto
        spin multiplicity state. It can be a number. If auto,
        multiplicity will be detected automatically, with the
        following rules:
            fragment_guesses=True
                multiplicity will +1 for each radical, and +2
                for each oxygen molecule
            fragment_guesses=False
                multiplicity will be 1 or 2, but +2 for each
                oxygen molecule
    charge : int, default=0
        molecule charge. Only used when charge is not provided
        by the system
    fragment_guesses : bool, default=False
        initial guess generated from fragment guesses. If True,
        multiplicity should be auto
    basis_set : str, default=None
        custom basis set
    keywords_high_multiplicity : str, default=None
        keywords for points with multiple raicals. multiplicity
        should be auto. If not set, fallback to normal keywords
    nproc : int, default=1
        Number of CPUs to use

    Returns
    -------
    str
        gjf output string
    """
    coordinates = sys_data["coords"][0]
    atom_names = sys_data["atom_names"]
    atom_numbs = sys_data["atom_numbs"]
    atom_types = sys_data["atom_types"]
    # get atom symbols list
    symbols = [atom_names[atom_type] for atom_type in atom_types]

    # assume default charge is zero and default spin multiplicity is 1
    if "charge" in sys_data.keys():
        charge = sys_data["charge"]

    use_fragment_guesses = False
    if isinstance(multiplicity, int):
        mult_auto = False
    elif multiplicity == "auto":
        mult_auto = True
    else:
        raise RuntimeError('The keyword "multiplicity" is illegal.')

    if fragment_guesses:
        # Initial guess generated from fragment guesses
        # New feature of Gaussian 16
        use_fragment_guesses = True
        if not mult_auto:
            warnings.warn("Automatically set multiplicity to auto!")
            mult_auto = True

    if mult_auto:
        frag_numb, frag_index = _crd2frag(symbols, coordinates)
        if frag_numb == 1:
            use_fragment_guesses = False
        mult_frags = []
        for i in range(frag_numb):
            idx = frag_index == i
            mult_frags.append(detect_multiplicity(np.array(symbols)[idx]))
        if use_fragment_guesses:
            multiplicity = sum(mult_frags) - frag_numb + 1 - charge % 2
            chargekeywords_frag = "%d %d" % (charge, multiplicity) + "".join(
                [" %d %d" % (charge, mult_frag) for mult_frag in mult_frags]
            )
        else:
            multi_frags = np.array(mult_frags)
            multiplicity = (
                1
                + np.count_nonzero(multi_frags == 2) % 2
                + np.count_nonzero(multi_frags == 3) * 2
                - charge % 2
            )

        if (
            keywords_high_multiplicity is not None
            and np.count_nonzero(multi_frags == 2) >= 2
        ):
            # at least 2 radicals
            keywords = keywords_high_multiplicity

    if isinstance(keywords, str):
        keywords = [keywords]
    else:
        keywords = keywords.copy()

    buff = []
    # keywords, e.g., force b3lyp/6-31g**
    if use_fragment_guesses:
        keywords[0] = f"{keywords[0]} guess=fragment={frag_numb}"

    chkkeywords = []
    if len(keywords) > 1:
        chkkeywords.append(f"%chk={str(uuid.uuid1())}.chk")

    nprockeywords = f"%nproc={nproc:d}"
    # use formula as title
    titlekeywords = "".join(
        [f"{symbol}{numb}" for symbol, numb in zip(atom_names, atom_numbs)]
    )
    chargekeywords = f"{charge} {multiplicity}"

    buff = [
        *chkkeywords,
        nprockeywords,
        f"#{keywords[0]}",
        "",
        titlekeywords,
        "",
        (chargekeywords_frag if use_fragment_guesses else chargekeywords),
    ]

    for ii, (symbol, coordinate) in enumerate(zip(symbols, coordinates)):
        if use_fragment_guesses:
            buff.append(
                "%s(Fragment=%d) %f %f %f" % (symbol, frag_index[ii] + 1, *coordinate)
            )
        else:
            buff.append("{} {:f} {:f} {:f}".format(symbol, *coordinate))
    if not sys_data.get("nopbc", False):
        # PBC condition
        cell = sys_data["cells"][0]
        for ii in range(3):
            # use TV as atomic symbol, see https://gaussian.com/pbc/
            buff.append("TV {:f} {:f} {:f}".format(*cell[ii]))
    if basis_set is not None:
        # custom basis set
        buff.extend(["", basis_set, ""])
    for kw in itertools.islice(keywords, 1, None):
        buff.extend(
            [
                "\n--link1--",
                *chkkeywords,
                nprockeywords,
                f"#{kw}",
                "",
                titlekeywords,
                "",
                chargekeywords,
                "",
            ]
        )
    buff.append("\n")
    return "\n".join(buff)




[docs]
def read_gaussian_input(inp: str):
    """Read Gaussian input.

    Parameters
    ----------
    inp : str
        Gaussian input str

    Returns
    -------
    dict
        system data
    """
    flag = 0
    coords = []
    elements = []
    cells = []
    for line in inp.split("\n"):
        if not line.strip():
            # empty line
            flag += 1
        elif flag == 0:
            # keywords
            if line.startswith("#"):
                # setting
                keywords = line.split()
            elif line.startswith("%"):
                pass
        elif flag == 1:
            # title
            pass
        elif flag == 2:
            # multi and coords
            s = line.split()
            if len(s) == 2:
                pass
            elif len(s) == 4:
                if s[0] == "TV":
                    cells.append(list(map(float, s[1:4])))
                else:
                    # element
                    elements.append(re.sub("\\(.*?\\)|\\{.*?}|\\[.*?]", "", s[0]))
                    coords.append(list(map(float, s[1:4])))
        elif flag == 3:
            # end
            break
    atom_names, atom_types, atom_numbs = np.unique(
        elements, return_inverse=True, return_counts=True
    )
    if len(cells):
        nopbc = False
    else:
        nopbc = True
        cells = np.array([np.eye(3)]) * 100
    return {
        "atom_names": list(atom_names),
        "atom_numbs": list(atom_numbs),
        "atom_types": atom_types,
        "cells": np.array(cells).reshape(1, 3, 3),
        "nopbc": nopbc,
        "coords": np.array(coords).reshape(1, -1, 3),
        "orig": np.zeros(3),
    }