Source code for dpdata.abacus.md

import os
import warnings

import numpy as np

from .scf import (
    bohr2ang,
    get_cell,
    get_coords,
    get_geometry_in,
    kbar2evperang3,
)

# Read in geometries from an ABACUS MD trajectory.
# The atomic coordinates are read in from generated files in OUT.XXXX.
# Energies, forces
# IMPORTANT: the program defaultly takes STRU input file as standard cell information,
# therefore the direct and cartesan coordinates read could be different from the ones in
# the output cif files!!!
# It is highly recommanded to use ORTHOGANAL coordinates in STRU file if you wish to get
# same coordinates in both dpdata and output cif files.



[docs]
def get_path_out(fname, inlines):
    # This function is different from the same-name function in scf.py.
    # This function returns OUT.XXXX's base directory.
    path_out = os.path.join(fname, "OUT.ABACUS/")
    for line in inlines:
        if len(line) > 0 and "suffix" in line and "suffix" == line.split()[0]:
            suffix = line.split()[1]
            path_out = os.path.join(fname, "OUT.%s/" % suffix)
            break
    return path_out




[docs]
def get_coord_dump_freq(inlines):
    for line in inlines:
        if len(line) > 0 and "md_dumpfreq" in line and "md_dumpfreq" == line.split()[0]:
            return int(line.split()[1])
    return 1




[docs]
def get_coords_from_dump(dumplines, natoms):
    nlines = len(dumplines)
    total_natoms = sum(natoms)
    # The output of VIRIAL, FORCE, and VELOCITY are controlled by INPUT parameters dump_virial, dump_force, and dump_vel, respectively.
    # So the search of keywords can determine whether these datas are printed into MD_dump.
    calc_stress = False
    calc_force = False
    check_line = 6
    if "VIRIAL" in dumplines[6]:
        calc_stress = True
        check_line = 10
    assert (
        "POSITION" in dumplines[check_line]
    ), "keywords 'POSITION' cannot be found in the 6th line. Please check."
    if "FORCE" in dumplines[check_line]:
        calc_force = True

    nframes_dump = -1
    if calc_stress:
        nframes_dump = int(nlines / (total_natoms + 13))
    else:
        nframes_dump = int(nlines / (total_natoms + 9))
    assert nframes_dump > 0, (
        "Number of lines in MD_dump file = %d. Number of atoms = %d. The MD_dump file is incomplete."
        % (nlines, total_natoms)
    )
    cells = np.zeros([nframes_dump, 3, 3])
    stresses = np.zeros([nframes_dump, 3, 3])
    forces = np.zeros([nframes_dump, total_natoms, 3])
    coords = np.zeros([nframes_dump, total_natoms, 3])
    iframe = 0
    for iline in range(nlines):
        if "MDSTEP" in dumplines[iline]:
            # read in LATTICE_CONSTANT
            # for abacus version >= v3.1.4, the unit is angstrom, and "ANGSTROM" is added at the end
            # for abacus version <  v3.1.4, the unit is bohr
            celldm = float(dumplines[iline + 1].split()[1])
            newversion = True
            if "Angstrom" not in dumplines[iline + 1]:
                celldm *= bohr2ang  # transfer unit to ANGSTROM
                newversion = False

            # read in LATTICE_VECTORS
            for ix in range(3):
                cells[iframe, ix] = (
                    np.array([float(i) for i in dumplines[iline + 3 + ix].split()[0:3]])
                    * celldm
                )
                if calc_stress:
                    stresses[iframe, ix] = np.array(
                        [float(i) for i in dumplines[iline + 7 + ix].split()[0:3]]
                    )

            if calc_stress:
                skipline = 11
            else:
                skipline = 7

            for iat in range(total_natoms):
                # INDEX    LABEL    POSITION (Angstrom)    FORCE (eV/Angstrom)    VELOCITY (Angstrom/fs)
                # 0  Sn  0.000000000000  0.000000000000  0.000000000000  -0.000000000000  -0.000000000001  -0.000000000001  0.001244557166  -0.000346684288  0.000768457739
                # 1  Sn  0.000000000000  3.102800034079  3.102800034079  -0.000186795145  -0.000453823768  -0.000453823768  0.000550996187  -0.000886442775  0.001579501983
                # for abacus version >= v3.1.4, the value of POSITION is the real cartessian position, and unit is angstrom, and if cal_force the VELOCITY is added at the end.
                # for abacus version < v3.1.4, the real position = POSITION * celldm
                coords[iframe, iat] = np.array(
                    [float(i) for i in dumplines[iline + skipline + iat].split()[2:5]]
                )

                if not newversion:
                    coords[iframe, iat] *= celldm

                if calc_force:
                    forces[iframe, iat] = np.array(
                        [
                            float(i)
                            for i in dumplines[iline + skipline + iat].split()[5:8]
                        ]
                    )
            iframe += 1
    assert iframe == nframes_dump, (
        "iframe=%d, nframe_dump=%d. Number of frames does not match number of lines in MD_dump."
        % (iframe, nframes_dump)
    )
    stresses *= kbar2evperang3
    return coords, cells, forces, stresses




[docs]
def get_energy(outlines, ndump, dump_freq):
    energy = []
    nenergy = 0
    for line_idx, line in enumerate(outlines):
        if "final etot is" in line:
            if nenergy % dump_freq == 0:
                energy.append(float(line.split()[-2]))
            nenergy += 1
        elif "!! convergence has not been achieved" in line:
            if nenergy % dump_freq == 0:
                energy.append(np.nan)
            nenergy += 1
    assert ndump == len(energy), (
        "Number of total energies in running_md.log = %d. Number of frames in MD_dump = %d. Please check."
        % (len(energy), ndump)
    )
    energy = np.array(energy)
    return energy




[docs]
def get_frame(fname):
    if isinstance(fname, str):
        # if the input parameter is only one string, it is assumed that it is the
        # base directory containing INPUT file;
        path_in = os.path.join(fname, "INPUT")
    else:
        raise RuntimeError("invalid input")
    with open(path_in) as fp:
        inlines = fp.read().split("\n")
    geometry_path_in = get_geometry_in(fname, inlines)  # base dir of STRU
    path_out = get_path_out(fname, inlines)

    with open(geometry_path_in) as fp:
        geometry_inlines = fp.read().split("\n")
    celldm, cell = get_cell(geometry_inlines)
    atom_names, natoms, types, coords = get_coords(
        celldm, cell, geometry_inlines, inlines
    )
    # This coords is not to be used.
    dump_freq = get_coord_dump_freq(inlines=inlines)
    # ndump = int(os.popen("ls -l %s | grep 'md_pos_' | wc -l" %path_out).readlines()[0])
    # number of dumped geometry files
    # coords = get_coords_from_cif(ndump, dump_freq, atom_names, natoms, types, path_out, cell)
    with open(os.path.join(path_out, "MD_dump")) as fp:
        dumplines = fp.read().split("\n")
    coords, cells, force, stress = get_coords_from_dump(dumplines, natoms)
    ndump = np.shape(coords)[0]
    with open(os.path.join(path_out, "running_md.log")) as fp:
        outlines = fp.read().split("\n")
    energy = get_energy(outlines, ndump, dump_freq)

    unconv_stru = ""
    for i, iene in enumerate(energy):
        if np.isnan(iene):
            coords = np.delete(coords, i - ndump, axis=0)
            cells = np.delete(cells, i - ndump, axis=0)
            force = np.delete(force, i - ndump, axis=0)
            stress = np.delete(stress, i - ndump, axis=0)
            energy = np.delete(energy, i - ndump, axis=0)
            unconv_stru += "%d " % i
    ndump = len(energy)
    if unconv_stru != "":
        warnings.warn("Structure %s are unconverged and not collected!" % unconv_stru)

    for iframe in range(ndump):
        stress[iframe] *= np.linalg.det(cells[iframe, :, :].reshape([3, 3]))
    if np.sum(np.abs(stress[0])) < 1e-10:
        stress = None
    data = {}
    data["atom_names"] = atom_names
    data["atom_numbs"] = natoms
    data["atom_types"] = types
    data["cells"] = cells
    # for idx in range(ndump):
    #    data['cells'][:, :, :] = cell
    data["coords"] = coords
    data["energies"] = energy
    data["forces"] = force
    data["virials"] = stress
    if not isinstance(data["virials"], np.ndarray):
        del data["virials"]
    data["orig"] = np.zeros(3)

    return data