Source code for dpdata.formats.gaussian.fchk

from __future__ import annotations

from typing import TYPE_CHECKING

import numpy as np

from dpdata.utils import open_file

if TYPE_CHECKING:
    from dpdata.utils import FileType

from ...periodic_table import ELEMENTS
from ...unit import (
    EnergyConversion,
    ForceConversion,
    HessianConversion,
    LengthConversion,
)

length_convert = LengthConversion("bohr", "angstrom").value()
energy_convert = EnergyConversion("hartree", "eV").value()
force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value()
hessian_convert = HessianConversion("hartree/bohr^2", "eV/angstrom^2").value()


[docs] def create_full_hessian(hessian_raw: list | np.ndarray, natoms: int) -> np.ndarray: """ Reconstructs the full, symmetric Hessian matrix from a 1D array containing its lower triangular elements. Args: hessian_raw (list | np.ndarray): A 1D list or NumPy array containing the lower triangular elements (including the diagonal) of the Hessian matrix. natoms (int): The number of atoms in the system. Returns ------- np.ndarray: A full, symmetric (3*natoms, 3*natoms) Hessian matrix. Raises ------ ValueError: If the number of elements in `hessian_raw` does not match the expected number for the lower triangle of a (3*natoms, 3*natoms) matrix. """ # Convert input to a NumPy array in case it's a list hessian_block = np.array(hessian_raw) # Calculate the dimension of the final matrix dim = 3 * natoms # Validate that the input data has the correct length # A lower triangle of an n x n matrix has n*(n+1)/2 elements expected_length = dim * (dim + 1) // 2 if hessian_block.size != expected_length: raise ValueError( f"Input length {hessian_block.size} != expected {expected_length}" ) # Create a zero matrix, then fill the lower triangle hessian_full = np.zeros((dim, dim), dtype=hessian_block.dtype) lower_triangle_indices = np.tril_indices(dim) hessian_full[lower_triangle_indices] = hessian_block # This is done by copying the lower triangle to the upper triangle # M_full = M_lower + M_lower.T - diag(M_lower) hessian_full = hessian_full + hessian_full.T - np.diag(np.diag(hessian_full)) return hessian_full
[docs] def to_system_data(file_name: FileType, has_forces=True, has_hessian=True): """Read Gaussian fchk file. Parameters ---------- file_name : str file name has_forces : bool, default True whether to read force Note: Cartesian Gradient in fchk file is converted to forces by taking negative sign has_hessian : bool, default True whether to read hessian Returns ------- data : dict system data, including hessian if has_hessian is True """ data = {} natoms = 0 atom_numbers = [] coords_t = [] energy_t = [] forces_t = [] hessian_t = [] # Read fchk file with open_file(file_name) as fp: for line in fp: if isinstance(line, bytes): line = line.decode(errors="ignore") if "Number of atoms" in line: natoms = int(line.split()[-1]) elif "Atomic numbers" in line and "I" in line: n = int(line.split()[-1]) atom_numbers = [] while len(atom_numbers) < n: next_line = next(fp) if isinstance(next_line, bytes): next_line = next_line.decode(errors="ignore") atom_numbers += [int(x) for x in next_line.split()] elif "Current cartesian coordinates" in line and "R" in line: n = int(line.split()[-1]) coords_raw = [] while len(coords_raw) < n: next_line = next(fp) if isinstance(next_line, bytes): next_line = next_line.decode(errors="ignore") coords_raw += [float(x) for x in next_line.split()] coords = np.array(coords_raw).reshape(-1, 3) * length_convert coords_t.append(coords) elif "Total Energy" in line: energy = float(line.split()[-1]) * energy_convert energy_t.append(energy) elif "Cartesian Gradient" in line: n = int(line.split()[-1]) forces_raw = [] while len(forces_raw) < n: next_line = next(fp) if isinstance(next_line, bytes): next_line = next_line.decode(errors="ignore") forces_raw += [float(x) for x in next_line.split()] # Cartesian Gradient is the negative of forces: F = -∇E forces = -np.array(forces_raw).reshape(-1, 3) * force_convert forces_t.append(forces) elif "Cartesian Force Constants" in line and "R" in line: n = int(line.split()[-1]) hessian_raw = [] while len(hessian_raw) < n: next_line = next(fp) if isinstance(next_line, bytes): next_line = next_line.decode(errors="ignore") hessian_raw += [float(x) for x in next_line.split()] hessian_full = ( create_full_hessian(hessian_raw, natoms) * hessian_convert ) # store as (natoms, 3, natoms, 3) to align with registered shape hessian_t.append(hessian_full.reshape(natoms, 3, natoms, 3)) # Assert key data assert coords_t, "cannot find coords" assert energy_t, "cannot find energy" if has_forces: assert forces_t, "cannot find forces" if has_hessian: assert hessian_t, "cannot find hessian" # Assemble data atom_symbols = [ELEMENTS[z - 1] for z in atom_numbers] atom_names, atom_types, atom_numbs = np.unique( atom_symbols, return_inverse=True, return_counts=True ) data["atom_names"] = list(atom_names) data["atom_numbs"] = list(atom_numbs) data["atom_types"] = atom_types data["coords"] = np.array(coords_t).reshape(-1, natoms, 3) data["orig"] = np.zeros(3) data["cells"] = np.array([np.eye(3) * 100]) data["nopbc"] = True if energy_t: data["energies"] = np.array(energy_t) if has_forces and forces_t: data["forces"] = np.array(forces_t) if has_hessian and hessian_t: data["hessian"] = np.array(hessian_t) return data