Source code for dpdata.abacus.scf

import os
import re
import warnings

import numpy as np

from ..unit import EnergyConversion, LengthConversion, PressureConversion

bohr2ang = LengthConversion("bohr", "angstrom").value()
ry2ev = EnergyConversion("rydberg", "eV").value()
kbar2evperang3 = PressureConversion("kbar", "eV/angstrom^3").value()

ABACUS_STRU_KEYS = [
    "ATOMIC_SPECIES",
    "NUMERICAL_ORBITAL",
    "LATTICE_CONSTANT",
    "LATTICE_VECTORS",
    "ATOMIC_POSITIONS",
    "NUMERICAL_DESCRIPTOR",
    "PAW_FILES",
]


[docs] def CheckFile(ifile): if not os.path.isfile(ifile): print("Can not find file %s" % ifile) return False return True
[docs] def get_block(lines, keyword, skip=0, nlines=None): ret = [] found = False if not nlines: nlines = 1e6 for idx, ii in enumerate(lines): if keyword in ii: found = True blk_idx = idx + 1 + skip line_idx = 0 while len(re.split(r"\s+", lines[blk_idx])) == 0: blk_idx += 1 while line_idx < nlines and blk_idx != len(lines): if len(re.split(r"\s+", lines[blk_idx])) == 0 or lines[blk_idx] == "": blk_idx += 1 continue ret.append(lines[blk_idx]) blk_idx += 1 line_idx += 1 break if not found: return None return ret
[docs] def get_stru_block(lines, keyword): # return the block of lines after keyword in STRU file, and skip the blank lines def clean_comment(line): return re.split("[#]", line)[0] ret = [] found = False for i in range(len(lines)): if clean_comment(lines[i]).strip() == keyword: found = True for j in range(i + 1, len(lines)): if clean_comment(lines[j]).strip() == "": continue elif clean_comment(lines[j]).strip() in ABACUS_STRU_KEYS: break else: ret.append(clean_comment(lines[j])) if not found: return None return ret
[docs] def get_geometry_in(fname, inlines): geometry_path_in = os.path.join(fname, "STRU") for line in inlines: if "stru_file" in line and "stru_file" == line.split()[0]: atom_file = line.split()[1] geometry_path_in = os.path.join(fname, atom_file) break return geometry_path_in
[docs] def get_path_out(fname, inlines): path_out = os.path.join(fname, "OUT.ABACUS/running_scf.log") for line in inlines: if "suffix" in line and "suffix" == line.split()[0]: suffix = line.split()[1] path_out = os.path.join(fname, "OUT.%s/running_scf.log" % suffix) break return path_out
[docs] def get_cell(geometry_inlines): cell_lines = get_stru_block(geometry_inlines, "LATTICE_VECTORS") celldm_lines = get_stru_block(geometry_inlines, "LATTICE_CONSTANT") celldm = float(celldm_lines[0].split()[0]) * bohr2ang # lattice const is in Bohr cell = [] for ii in range(3): cell.append([float(jj) for jj in cell_lines[ii].split()[0:3]]) cell = celldm * np.array(cell) return celldm, cell
[docs] def get_coords(celldm, cell, geometry_inlines, inlines=None): coords_lines = get_stru_block(geometry_inlines, "ATOMIC_POSITIONS") # assuming that ATOMIC_POSITIONS is at the bottom of the STRU file coord_type = coords_lines[0].split()[0].lower() # cartisan or direct atom_names = [] # element abbr in periodic table atom_types = [] # index of atom_names of each atom in the geometry atom_numbs = [] # of atoms for each element coords = [] # coordinations of atoms ntype = get_nele_from_stru(geometry_inlines) line_idx = 1 # starting line of first element for it in range(ntype): atom_names.append(coords_lines[line_idx].split()[0]) line_idx += 2 atom_numbs.append(int(coords_lines[line_idx].split()[0])) line_idx += 1 for iline in range(atom_numbs[it]): xyz = np.array([float(xx) for xx in coords_lines[line_idx].split()[0:3]]) if coord_type == "cartesian": xyz = xyz * celldm elif coord_type == "direct": tmp = np.matmul(xyz, cell) xyz = tmp else: print("coord_type = %s" % coord_type) raise RuntimeError( "Input coordination type is invalid.\n Only direct and cartesian are accepted." ) coords.append(xyz) atom_types.append(it) line_idx += 1 coords = np.array(coords) # need transformation!!! atom_types = np.array(atom_types) return atom_names, atom_numbs, atom_types, coords
[docs] def get_energy(outlines): Etot = None for line in reversed(outlines): if "final etot is" in line: Etot = float(line.split()[-2]) # in eV return Etot, True elif "convergence has NOT been achieved!" in line: return Etot, False elif "convergence has not been achieved" in line: return Etot, False return Etot, False
[docs] def collect_force(outlines): force = [] for i, line in enumerate(outlines): if "TOTAL-FORCE (eV/Angstrom)" in line: value_pattern = re.compile( r"^\s*[A-Z][a-z]?[1-9][0-9]*\s+[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\s+[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\s+[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\s*$" ) j = i # find the first line of force noforce = False while not value_pattern.match(outlines[j]): j += 1 if ( j >= i + 10 ): # if can not find the first line of force in 10 lines, then stop warnings.warn("Warning: can not find the first line of force") noforce = True break if noforce: break force.append([]) while value_pattern.match(outlines[j]): force[-1].append([float(ii) for ii in outlines[j].split()[1:4]]) j += 1 return force # only return the last force
[docs] def get_force(outlines, natoms): force = collect_force(outlines) if len(force) == 0: return [[]] else: return np.array(force[-1]) # only return the last force
[docs] def collect_stress(outlines): stress = [] for i, line in enumerate(outlines): if "TOTAL-STRESS (KBAR)" in line: value_pattern = re.compile( r"^\s*[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\s+[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\s+[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?\s*$" ) j = i nostress = False while not value_pattern.match(outlines[j]): j += 1 if ( j >= i + 10 ): # if can not find the first line of stress in 10 lines, then stop warnings.warn("Warning: can not find the first line of stress") nostress = True break if nostress: break stress.append([]) while value_pattern.match(outlines[j]): stress[-1].append( list(map(lambda x: float(x), outlines[j].split()[0:3])) ) j += 1 return stress
[docs] def get_stress(outlines): stress = collect_stress(outlines) if len(stress) == 0: return None else: return np.array(stress[-1]) * kbar2evperang3 # only return the last stress
[docs] def get_frame(fname): data = { "atom_names": [], "atom_numbs": [], "atom_types": [], "cells": [], "coords": [], "energies": [], "forces": [], } if isinstance(fname, str): # if the input parameter is only one string, it is assumed that it is the # base directory containing INPUT file; path_in = os.path.join(fname, "INPUT") else: raise RuntimeError("invalid input") if not CheckFile(path_in): return data with open(path_in) as fp: inlines = fp.read().split("\n") geometry_path_in = get_geometry_in(fname, inlines) path_out = get_path_out(fname, inlines) if not (CheckFile(geometry_path_in) and CheckFile(path_out)): return data with open(geometry_path_in) as fp: geometry_inlines = fp.read().split("\n") with open(path_out) as fp: outlines = fp.read().split("\n") celldm, cell = get_cell(geometry_inlines) atom_names, natoms, types, coords = get_coords( celldm, cell, geometry_inlines, inlines ) data["atom_names"] = atom_names data["atom_numbs"] = natoms data["atom_types"] = types energy, converge = get_energy(outlines) if not converge: return data force = get_force(outlines, natoms) stress = get_stress(outlines) if stress is not None: stress *= np.abs(np.linalg.det(cell)) data["cells"] = cell[np.newaxis, :, :] data["coords"] = coords[np.newaxis, :, :] data["energies"] = np.array(energy)[np.newaxis] data["forces"] = force[np.newaxis, :, :] if stress is not None: data["virials"] = stress[np.newaxis, :, :] data["orig"] = np.zeros(3) # print("atom_names = ", data['atom_names']) # print("natoms = ", data['atom_numbs']) # print("types = ", data['atom_types']) # print("cells = ", data['cells']) # print("coords = ", data['coords']) # print("energy = ", data['energies']) # print("force = ", data['forces']) # print("virial = ", data['virials']) return data
[docs] def get_nele_from_stru(geometry_inlines): key_words_list = [ "ATOMIC_SPECIES", "NUMERICAL_ORBITAL", "LATTICE_CONSTANT", "LATTICE_VECTORS", "ATOMIC_POSITIONS", "NUMERICAL_DESCRIPTOR", ] keyword_sequence = [] keyword_line_index = [] atom_names = [] atom_numbs = [] for iline, line in enumerate(geometry_inlines): if line.split() == []: continue have_key_word = False for keyword in key_words_list: if keyword in line and keyword == line.split()[0]: keyword_sequence.append(keyword) keyword_line_index.append(iline) assert len(keyword_line_index) == len(keyword_sequence) assert len(keyword_sequence) > 0 keyword_line_index.append(len(geometry_inlines)) nele = 0 for idx, keyword in enumerate(keyword_sequence): if keyword == "ATOMIC_SPECIES": for iline in range( keyword_line_index[idx] + 1, keyword_line_index[idx + 1] ): if len(re.split(r"\s+", geometry_inlines[iline])) >= 3: nele += 1 return nele
[docs] def get_frame_from_stru(fname): assert isinstance(fname, str) with open(fname) as fp: geometry_inlines = fp.read().split("\n") nele = get_nele_from_stru(geometry_inlines) inlines = ["ntype %d" % nele] celldm, cell = get_cell(geometry_inlines) atom_names, natoms, types, coords = get_coords( celldm, cell, geometry_inlines, inlines ) data = {} data["atom_names"] = atom_names data["atom_numbs"] = natoms data["atom_types"] = types data["cells"] = cell[np.newaxis, :, :] data["coords"] = coords[np.newaxis, :, :] data["orig"] = np.zeros(3) return data
[docs] def make_unlabeled_stru( data, frame_idx, pp_file=None, numerical_orbital=None, numerical_descriptor=None, mass=None, ): out = "ATOMIC_SPECIES\n" for iele in range(len(data["atom_names"])): out += data["atom_names"][iele] + " " if mass is not None: out += "%.3f " % mass[iele] else: out += "1 " if pp_file is not None: out += "%s\n" % pp_file[iele] else: out += "\n" out += "\n" if numerical_orbital is not None: assert len(numerical_orbital) == len(data["atom_names"]) out += "NUMERICAL_ORBITAL\n" for iele in range(len(numerical_orbital)): out += "%s\n" % numerical_orbital[iele] out += "\n" if numerical_descriptor is not None: assert isinstance(numerical_descriptor, str) out += "NUMERICAL_DESCRIPTOR\n%s\n" % numerical_descriptor out += "\n" out += "LATTICE_CONSTANT\n" out += str(1 / bohr2ang) + "\n\n" out += "LATTICE_VECTORS\n" for ix in range(3): for iy in range(3): out += str(data["cells"][frame_idx][ix][iy]) + " " out += "\n" out += "\n" out += "ATOMIC_POSITIONS\n" out += "Cartesian # Cartesian(Unit is LATTICE_CONSTANT)\n" # ret += "\n" natom_tot = 0 for iele in range(len(data["atom_names"])): out += data["atom_names"][iele] + "\n" out += "0.0\n" out += str(data["atom_numbs"][iele]) + "\n" for iatom in range(data["atom_numbs"][iele]): iatomtype = np.nonzero(data["atom_types"] == iele)[0][iatom] out += "%.12f %.12f %.12f %d %d %d\n" % ( data["coords"][frame_idx][iatomtype, 0], data["coords"][frame_idx][iatomtype, 1], data["coords"][frame_idx][iatomtype, 2], 1, 1, 1, ) natom_tot += 1 assert natom_tot == sum(data["atom_numbs"]) return out
# if __name__ == "__main__": # path = "/home/lrx/work/12_ABACUS_dpgen_interface/dpdata/dpdata/tests/abacus.scf" # data = get_frame(path)