Source code for dpdata.amber.md

import os
import re

import numpy as np
from scipy.io import netcdf_file

from dpdata.amber.mask import pick_by_amber_mask
from dpdata.unit import EnergyConversion

from ..periodic_table import ELEMENTS

kcalmol2eV = EnergyConversion("kcal_mol", "eV").value()
symbols = ["X"] + ELEMENTS

energy_convert = kcalmol2eV
force_convert = energy_convert


[docs] def read_amber_traj( parm7_file, nc_file, mdfrc_file=None, mden_file=None, mdout_file=None, use_element_symbols=None, labeled=True, ): """The amber trajectory includes: * nc, NetCDF format, stores coordinates * mdfrc, NetCDF format, stores forces * mden (optional), text format, stores energies * mdout (optional), text format, may store energies if there is no mden_file * parm7, text format, stores types. Parameters ---------- parm7_file, nc_file, mdfrc_file, mden_file, mdout_file: filenames use_element_symbols : None or list or str If use_element_symbols is a list of atom indexes, these atoms will use element symbols instead of amber types. For example, a ligand will use C, H, O, N, and so on instead of h1, hc, o, os, and so on. IF use_element_symbols is str, it will be considered as Amber mask. labeled : bool Whether to return labeled data """ flag_atom_type = False flag_atom_numb = False amber_types = [] atomic_number = [] with open(parm7_file) as f: for line in f: if line.startswith("%FLAG"): flag_atom_type = line.startswith("%FLAG AMBER_ATOM_TYPE") flag_atom_numb = (use_element_symbols is not None) and line.startswith( "%FLAG ATOMIC_NUMBER" ) elif flag_atom_type or flag_atom_numb: if line.startswith("%FORMAT"): fmt = re.findall(r"\d+", line) fmt0 = int(fmt[0]) fmt1 = int(fmt[1]) else: for ii in range(fmt0): start_index = ii * fmt1 end_index = (ii + 1) * fmt1 if end_index >= len(line): continue content = line[start_index:end_index].strip() if flag_atom_type: amber_types.append(content) elif flag_atom_numb: atomic_number.append(int(content)) if use_element_symbols is not None: if isinstance(use_element_symbols, str): use_element_symbols = pick_by_amber_mask(parm7_file, use_element_symbols) for ii in use_element_symbols: amber_types[ii] = symbols[atomic_number[ii]] with netcdf_file(nc_file, "r") as f: coords = np.array(f.variables["coordinates"][:]) cell_lengths = np.array(f.variables["cell_lengths"][:]) cell_angles = np.array(f.variables["cell_angles"][:]) if np.all(cell_angles > 89.99) and np.all(cell_angles < 90.01): # only support 90 # TODO: support other angles shape = cell_lengths.shape cells = np.zeros((shape[0], 3, 3)) for ii in range(3): cells[:, ii, ii] = cell_lengths[:, ii] else: raise RuntimeError("Unsupported cells") if labeled: with netcdf_file(mdfrc_file, "r") as f: forces = np.array(f.variables["forces"][:]) # load energy from mden_file or mdout_file energies = [] if mden_file is not None and os.path.isfile(mden_file): with open(mden_file) as f: for line in f: if line.startswith("L6"): s = line.split() if s[2] != "E_pot": energies.append(float(s[2])) elif mdout_file is not None and os.path.isfile(mdout_file): with open(mdout_file) as f: for line in f: if "EPtot" in line: s = line.split() energies.append(float(s[-1])) else: raise RuntimeError("Please provide one of mden_file and mdout_file") atom_names, atom_types, atom_numbs = np.unique( amber_types, return_inverse=True, return_counts=True ) data = {} data["atom_names"] = list(atom_names) data["atom_numbs"] = list(atom_numbs) data["atom_types"] = atom_types if labeled: data["forces"] = forces * force_convert data["energies"] = np.array(energies) * energy_convert data["coords"] = coords data["cells"] = cells data["orig"] = np.array([0, 0, 0]) return data