Source code for dpdata.plugins.n2p2

import numpy as np

from dpdata.format import Format

from ..unit import EnergyConversion, ForceConversion, LengthConversion

length_convert = LengthConversion("bohr", "angstrom").value()
energy_convert = EnergyConversion("hartree", "eV").value()
force_convert = ForceConversion("hartree/bohr", "eV/angstrom").value()



[docs]
def match_indices(atype1, atype2):
    # Ensure atype2 is a numpy array for efficient operations
    atype2 = np.array(atype2)
    # Placeholder for matched indices
    matched_indices = []
    # Track used indices to handle duplicates
    used_indices = set()

    # Iterate over each element in atype1
    for element in atype1:
        # Find all indices of the current element in atype2
        # np.where returns a tuple, so [0] is used to access the array of indices
        indices = np.where(atype2 == element)[0]

        # Find the first unused index
        for index in indices:
            if index not in used_indices:
                # Add the index to the results and mark it as used
                matched_indices.append(index)
                used_indices.add(index)
                break  # Move to the next element in atype1

    return matched_indices




[docs]
@Format.register("n2p2")
class N2P2Format(Format):
    """n2p2.

    This class support the conversion from and to the training data of n2p2 format.
    For more information about the n2p2 format, please refer to https://compphysvienna.github.io/n2p2/topics/cfg_file.html
    """


[docs]
    def from_labeled_system(self, file_name, **kwargs):
        """Read from n2p2 format.

        Parameters
        ----------
        file_name : str
            file name, i.e. the first argument
        **kwargs : dict
            keyword arguments that will be passed from the method

        Returns
        -------
        data : dict
            system data, whose keys are defined in LabeledSystem.DTYPES
        """
        cells = []
        coords = []
        atypes = []
        forces = []
        energies = []
        natom0 = None
        natoms0 = None
        atom_types0 = None
        with open(file_name) as file:
            for line in file:
                line = line.strip()  # Remove leading/trailing whitespace
                if line.lower() == "begin":
                    current_section = []  # Start a new section
                    cell = []
                    coord = []
                    atype = []
                    force = []
                    energy = None
                elif line.lower() == "end":
                    # If we are at the end of a section, process the section
                    assert (
                        len(coord) == len(atype) == len(force)
                    ), "Number of atoms, atom types, and forces must match."

                    # Check if the number of atoms is consistent across all frames
                    natom = len(coord)
                    if natom0 is None:
                        natom0 = natom
                    else:
                        assert (
                            natom == natom0
                        ), "The number of atoms in all frames must be the same."

                    # Check if the number of atoms of each type is consistent across all frames
                    atype = np.array(atype)
                    unique_dict = {element: None for element in atype}
                    unique_atypes = np.array(list(unique_dict.keys()))
                    unique_atypes_list = list(unique_atypes)
                    ntypes = len(unique_atypes)
                    natoms = [len(atype[atype == at]) for at in unique_atypes]
                    if natoms0 is None:
                        natoms0 = natoms
                    else:
                        assert (
                            natoms == natoms0
                        ), "The number of atoms of each type in all frames must be the same."
                    if atom_types0 is None:
                        atom_types0 = atype
                    atom_order = match_indices(atom_types0, atype)

                    cell = np.array(cell, dtype=float)
                    coord = np.array(coord, dtype=float)[atom_order]
                    force = np.array(force, dtype=float)[atom_order]

                    cells.append(cell)
                    coords.append(coord)
                    forces.append(force)
                    energies.append(float(energy))

                    current_section = None  # Reset for the next section
                elif current_section is not None:
                    # If we are inside a section, append the line to the current section
                    # current_section.append(line)
                    line_contents = line.split()
                    if line_contents[0] == "lattice":
                        cell.append(line_contents[1:])
                    elif line_contents[0] == "atom":
                        coord.append(line_contents[1:4])
                        atype.append(line_contents[4])
                        force.append(line_contents[7:10])
                    elif line_contents[0] == "energy":
                        energy = line_contents[1]

        atom_names = unique_atypes_list
        atom_numbs = natoms
        atom_types = np.zeros(len(atom_types0), dtype=int)
        for i in range(ntypes):
            atom_types[atom_types0 == unique_atypes_list[i]] = i

        cells = np.array(cells) * length_convert
        coords = np.array(coords) * length_convert
        forces = np.array(forces) * force_convert
        energies = np.array(energies) * energy_convert

        return {
            "atom_names": list(atom_names),
            "atom_numbs": list(atom_numbs),
            "atom_types": atom_types,
            "coords": coords,
            "cells": cells,
            "nopbc": False,
            "orig": np.zeros(3),
            "energies": energies,
            "forces": forces,
        }



[docs]
    def to_labeled_system(self, data, file_name, **kwargs):
        """Write n2p2 format.

        By default, LabeledSystem.to will fallback to System.to.

        Parameters
        ----------
        data : dict
            system data, whose keys are defined in LabeledSystem.DTYPES
        file_name : str
            file name, where the data will be written
        *args : list
            arguments that will be passed from the method
        **kwargs : dict
            keyword arguments that will be passed from the method
        """
        buff = []
        nframe = len(data["energies"])
        natom = len(data["atom_types"])
        atom_names = data["atom_names"]
        for frame in range(nframe):
            coord = data["coords"][frame] / length_convert
            force = data["forces"][frame] / force_convert
            energy = data["energies"][frame] / energy_convert
            cell = data["cells"][frame] / length_convert
            atype = data["atom_types"]
            buff.append("begin")
            for i in range(3):
                buff.append(
                    f"lattice {cell[i][0]:15.6f}  {cell[i][1]:15.6f}  {cell[i][2]:15.6f}"
                )
            for i in range(natom):
                buff.append(
                    f"atom {coord[i][0]:15.6f} {coord[i][1]:15.6f} {coord[i][2]:15.6f} {atom_names[atype[i]]:>7} {0:15.6f} {0:15.6f} {force[i][0]:15.6e} {force[i][1]:15.6e} {force[i][2]:15.6e}"
                )
            buff.append(f"energy {energy:15.6f}")
            buff.append(f"charge {0:15.6f}")
            buff.append("end")
        with open(file_name, "w") as fp:
            fp.write("\n".join(buff))