Source code for dpdata.gaussian.gjf

# The initial code of this file is based on
# https://github.com/deepmodeling/dpgen/blob/0767dce7cad29367edb2e4a55fd0d8724dbda642/dpgen/generator/lib/gaussian.py#L1-L190
# under LGPL 3.0 license
"""Generate Gaussian input file."""

import itertools
import re
import uuid
import warnings
from typing import List, Optional, Tuple, Union

import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import connected_components

try:
    from openbabel import openbabel
except ImportError:
    try:
        import openbabel
    except ImportError:
        openbabel = None
from dpdata.periodic_table import Element


def _crd2frag(symbols: List[str], crds: np.ndarray) -> Tuple[int, List[int]]:
    """Detect fragments from coordinates.

    Parameters
    ----------
    symbols : list[str]
        element symbols; virtual elements are not supported
    crds : np.ndarray
        atomic coordinates, shape: (N, 3)

    Returns
    -------
    frag_numb : int
        number of fragments
    frag_index : list[int]
        frament index that each atom belongs to

    Notes
    -----
    In this method, Open Babel is used to detect bond connectivity. The threshold
    is the sum of covalent radii with a slight tolerance (0.45 A). Note that
    this threshold has errors.

    PBC support is removed from this method as Gaussian does not support PBC calculation.

    Raises
    ------
    ImportError
        if Open Babel is not installed
    """
    if openbabel is None:
        raise ImportError(
            "Open Babel (Python interface) should be installed to detect fragmentation!"
        )
    atomnumber = len(symbols)
    # Use openbabel to connect atoms
    mol = openbabel.OBMol()
    mol.BeginModify()
    for idx, (symbol, position) in enumerate(zip(symbols, crds.astype(np.float64))):
        num = Element(symbol).Z
        atom = mol.NewAtom(idx)
        atom.SetAtomicNum(int(num))
        atom.SetVector(*position)
    mol.ConnectTheDots()
    mol.PerceiveBondOrders()
    mol.EndModify()
    bonds = []
    for ii in range(mol.NumBonds()):
        bond = mol.GetBond(ii)
        a = bond.GetBeginAtom().GetId()
        b = bond.GetEndAtom().GetId()
        bo = bond.GetBondOrder()
        bonds.extend([[a, b, bo], [b, a, bo]])
    bonds = np.array(bonds, ndmin=2).reshape((-1, 3))
    graph = csr_matrix(
        (bonds[:, 2], (bonds[:, 0], bonds[:, 1])), shape=(atomnumber, atomnumber)
    )
    frag_numb, frag_index = connected_components(graph, 0)
    return frag_numb, frag_index


[docs] def detect_multiplicity(symbols: np.ndarray) -> int: """Find the minimal multiplicity of the given molecules. Parameters ---------- symbols : np.ndarray element symbols; virtual elements are not supported Returns ------- int spin multiplicity """ # currently only support charge=0 # oxygen -> 3 if np.count_nonzero(symbols == ["O"]) == 2 and symbols.size == 2: return 3 # calculates the total number of electrons, assumes they are paired as much as possible n_total = sum([Element(s).Z for s in symbols]) return n_total % 2 + 1
[docs] def make_gaussian_input( sys_data: dict, keywords: Union[str, List[str]], multiplicity: Union[str, int] = "auto", charge: int = 0, fragment_guesses: bool = False, basis_set: Optional[str] = None, keywords_high_multiplicity: Optional[str] = None, nproc: int = 1, ) -> str: """Make gaussian input file. Parameters ---------- sys_data : dict system data keywords : str or list[str] Gaussian keywords, e.g. force b3lyp/6-31g**. If a list, run multiple steps multiplicity : str or int, default=auto spin multiplicity state. It can be a number. If auto, multiplicity will be detected automatically, with the following rules: fragment_guesses=True multiplicity will +1 for each radical, and +2 for each oxygen molecule fragment_guesses=False multiplicity will be 1 or 2, but +2 for each oxygen molecule charge : int, default=0 molecule charge. Only used when charge is not provided by the system fragment_guesses : bool, default=False initial guess generated from fragment guesses. If True, multiplicity should be auto basis_set : str, default=None custom basis set keywords_high_multiplicity : str, default=None keywords for points with multiple raicals. multiplicity should be auto. If not set, fallback to normal keywords nproc : int, default=1 Number of CPUs to use Returns ------- str gjf output string """ coordinates = sys_data["coords"][0] atom_names = sys_data["atom_names"] atom_numbs = sys_data["atom_numbs"] atom_types = sys_data["atom_types"] # get atom symbols list symbols = [atom_names[atom_type] for atom_type in atom_types] # assume default charge is zero and default spin multiplicity is 1 if "charge" in sys_data.keys(): charge = sys_data["charge"] use_fragment_guesses = False if isinstance(multiplicity, int): mult_auto = False elif multiplicity == "auto": mult_auto = True else: raise RuntimeError('The keyword "multiplicity" is illegal.') if fragment_guesses: # Initial guess generated from fragment guesses # New feature of Gaussian 16 use_fragment_guesses = True if not mult_auto: warnings.warn("Automatically set multiplicity to auto!") mult_auto = True if mult_auto: frag_numb, frag_index = _crd2frag(symbols, coordinates) if frag_numb == 1: use_fragment_guesses = False mult_frags = [] for i in range(frag_numb): idx = frag_index == i mult_frags.append(detect_multiplicity(np.array(symbols)[idx])) if use_fragment_guesses: multiplicity = sum(mult_frags) - frag_numb + 1 - charge % 2 chargekeywords_frag = "%d %d" % (charge, multiplicity) + "".join( [" %d %d" % (charge, mult_frag) for mult_frag in mult_frags] ) else: multi_frags = np.array(mult_frags) multiplicity = ( 1 + np.count_nonzero(multi_frags == 2) % 2 + np.count_nonzero(multi_frags == 3) * 2 - charge % 2 ) if ( keywords_high_multiplicity is not None and np.count_nonzero(multi_frags == 2) >= 2 ): # at least 2 radicals keywords = keywords_high_multiplicity if isinstance(keywords, str): keywords = [keywords] else: keywords = keywords.copy() buff = [] # keywords, e.g., force b3lyp/6-31g** if use_fragment_guesses: keywords[0] = f"{keywords[0]} guess=fragment={frag_numb}" chkkeywords = [] if len(keywords) > 1: chkkeywords.append(f"%chk={str(uuid.uuid1())}.chk") nprockeywords = f"%nproc={nproc:d}" # use formula as title titlekeywords = "".join( [f"{symbol}{numb}" for symbol, numb in zip(atom_names, atom_numbs)] ) chargekeywords = f"{charge} {multiplicity}" buff = [ *chkkeywords, nprockeywords, f"#{keywords[0]}", "", titlekeywords, "", (chargekeywords_frag if use_fragment_guesses else chargekeywords), ] for ii, (symbol, coordinate) in enumerate(zip(symbols, coordinates)): if use_fragment_guesses: buff.append( "%s(Fragment=%d) %f %f %f" % (symbol, frag_index[ii] + 1, *coordinate) ) else: buff.append("{} {:f} {:f} {:f}".format(symbol, *coordinate)) if not sys_data.get("nopbc", False): # PBC condition cell = sys_data["cells"][0] for ii in range(3): # use TV as atomic symbol, see https://gaussian.com/pbc/ buff.append("TV {:f} {:f} {:f}".format(*cell[ii])) if basis_set is not None: # custom basis set buff.extend(["", basis_set, ""]) for kw in itertools.islice(keywords, 1, None): buff.extend( [ "\n--link1--", *chkkeywords, nprockeywords, f"#{kw}", "", titlekeywords, "", chargekeywords, "", ] ) buff.append("\n") return "\n".join(buff)
[docs] def read_gaussian_input(inp: str): """Read Gaussian input. Parameters ---------- inp : str Gaussian input str Returns ------- dict system data """ flag = 0 coords = [] elements = [] cells = [] for line in inp.split("\n"): if not line.strip(): # empty line flag += 1 elif flag == 0: # keywords if line.startswith("#"): # setting keywords = line.split() elif line.startswith("%"): pass elif flag == 1: # title pass elif flag == 2: # multi and coords s = line.split() if len(s) == 2: pass elif len(s) == 4: if s[0] == "TV": cells.append(list(map(float, s[1:4]))) else: # element elements.append(re.sub("\\(.*?\\)|\\{.*?}|\\[.*?]", "", s[0])) coords.append(list(map(float, s[1:4]))) elif flag == 3: # end break atom_names, atom_types, atom_numbs = np.unique( elements, return_inverse=True, return_counts=True ) if len(cells): nopbc = False else: nopbc = True cells = np.array([np.eye(3)]) * 100 return { "atom_names": list(atom_names), "atom_numbs": list(atom_numbs), "atom_types": atom_types, "cells": np.array(cells).reshape(1, 3, 3), "nopbc": nopbc, "coords": np.array(coords).reshape(1, -1, 3), "orig": np.zeros(3), }