Source code for dpdata.plugins.n2p2

import numpy as np

from dpdata.format import Format

from ..unit import EnergyConversion, ForceConversion, LengthConversion

length_convert = LengthConversion("bohr", "angstrom").value()
energy_convert = EnergyConversion("hartree", "eV").value()
force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value()


[docs] def match_indices(atype1, atype2): # Ensure atype2 is a numpy array for efficient operations atype2 = np.array(atype2) # Placeholder for matched indices matched_indices = [] # Track used indices to handle duplicates used_indices = set() # Iterate over each element in atype1 for element in atype1: # Find all indices of the current element in atype2 # np.where returns a tuple, so [0] is used to access the array of indices indices = np.where(atype2 == element)[0] # Find the first unused index for index in indices: if index not in used_indices: # Add the index to the results and mark it as used matched_indices.append(index) used_indices.add(index) break # Move to the next element in atype1 return matched_indices
[docs] @Format.register("n2p2") class N2P2Format(Format): """n2p2. This class support the conversion from and to the training data of n2p2 format. For more information about the n2p2 format, please refer to https://compphysvienna.github.io/n2p2/topics/cfg_file.html """
[docs] def from_labeled_system(self, file_name, **kwargs): """Read from n2p2 format. Parameters ---------- file_name : str file name, i.e. the first argument **kwargs : dict keyword arguments that will be passed from the method Returns ------- data : dict system data, whose keys are defined in LabeledSystem.DTYPES """ cells = [] coords = [] atypes = [] forces = [] energies = [] natom0 = None natoms0 = None atom_types0 = None with open(file_name) as file: for line in file: line = line.strip() # Remove leading/trailing whitespace if line.lower() == "begin": current_section = [] # Start a new section cell = [] coord = [] atype = [] force = [] energy = None elif line.lower() == "end": # If we are at the end of a section, process the section assert ( len(coord) == len(atype) == len(force) ), "Number of atoms, atom types, and forces must match." # Check if the number of atoms is consistent across all frames natom = len(coord) if natom0 is None: natom0 = natom else: assert ( natom == natom0 ), "The number of atoms in all frames must be the same." # Check if the number of atoms of each type is consistent across all frames atype = np.array(atype) unique_dict = {element: None for element in atype} unique_atypes = np.array(list(unique_dict.keys())) unique_atypes_list = list(unique_atypes) ntypes = len(unique_atypes) natoms = [len(atype[atype == at]) for at in unique_atypes] if natoms0 is None: natoms0 = natoms else: assert ( natoms == natoms0 ), "The number of atoms of each type in all frames must be the same." if atom_types0 is None: atom_types0 = atype atom_order = match_indices(atom_types0, atype) cell = np.array(cell, dtype=float) coord = np.array(coord, dtype=float)[atom_order] force = np.array(force, dtype=float)[atom_order] cells.append(cell) coords.append(coord) forces.append(force) energies.append(float(energy)) current_section = None # Reset for the next section elif current_section is not None: # If we are inside a section, append the line to the current section # current_section.append(line) line_contents = line.split() if line_contents[0] == "lattice": cell.append(line_contents[1:]) elif line_contents[0] == "atom": coord.append(line_contents[1:4]) atype.append(line_contents[4]) force.append(line_contents[7:10]) elif line_contents[0] == "energy": energy = line_contents[1] atom_names = unique_atypes_list atom_numbs = natoms atom_types = np.zeros(len(atom_types0), dtype=int) for i in range(ntypes): atom_types[atom_types0 == unique_atypes_list[i]] = i cells = np.array(cells) * length_convert coords = np.array(coords) * length_convert forces = np.array(forces) * force_convert energies = np.array(energies) * energy_convert return { "atom_names": list(atom_names), "atom_numbs": list(atom_numbs), "atom_types": atom_types, "coords": coords, "cells": cells, "nopbc": False, "orig": np.zeros(3), "energies": energies, "forces": forces, }
[docs] def to_labeled_system(self, data, file_name, **kwargs): """Write n2p2 format. By default, LabeledSystem.to will fallback to System.to. Parameters ---------- data : dict system data, whose keys are defined in LabeledSystem.DTYPES file_name : str file name, where the data will be written *args : list arguments that will be passed from the method **kwargs : dict keyword arguments that will be passed from the method """ buff = [] nframe = len(data["energies"]) natom = len(data["atom_types"]) atom_names = data["atom_names"] for frame in range(nframe): coord = data["coords"][frame] / length_convert force = data["forces"][frame] / force_convert energy = data["energies"][frame] / energy_convert cell = data["cells"][frame] / length_convert atype = data["atom_types"] buff.append("begin") for i in range(3): buff.append( f"lattice {cell[i][0]:15.6f} {cell[i][1]:15.6f} {cell[i][2]:15.6f}" ) for i in range(natom): buff.append( f"atom {coord[i][0]:15.6f} {coord[i][1]:15.6f} {coord[i][2]:15.6f} {atom_names[atype[i]]:>7} {0:15.6f} {0:15.6f} {force[i][0]:15.6e} {force[i][1]:15.6e} {force[i][2]:15.6e}" ) buff.append(f"energy {energy:15.6f}") buff.append(f"charge {0:15.6f}") buff.append("end") with open(file_name, "w") as fp: fp.write("\n".join(buff))