Source code for dpdata.vasp.outcar

import re
import warnings

import numpy as np


[docs] def system_info(lines, type_idx_zero=False): atom_names = [] atom_numbs = None nelm = None for ii in lines: ii_word_list = ii.split() if "TITEL" in ii: # get atom names from POTCAR info, tested only for PAW_PBE ... _ii = ii.split()[3] if "_" in _ii: # for case like : TITEL = PAW_PBE Sn_d 06Sep2000 atom_names.append(_ii.split("_")[0]) else: atom_names.append(_ii) # a stricker check for "NELM"; compatible with distingct formats in different versions(6 and older, newers_expect-to-work) of vasp elif nelm is None: m = re.search(r"NELM\s*=\s*(\d+)", ii) if m: nelm = int(m.group(1)) if "ions per type" in ii: atom_numbs_ = [int(s) for s in ii.split()[4:]] if atom_numbs is None: atom_numbs = atom_numbs_ else: assert atom_numbs == atom_numbs_, "in consistent numb atoms in OUTCAR" assert nelm is not None, "cannot find maximum steps for each SC iteration" assert atom_numbs is not None, "cannot find ion type info in OUTCAR" atom_names = atom_names[: len(atom_numbs)] atom_types = [] for idx, ii in enumerate(atom_numbs): for jj in range(ii): if type_idx_zero: atom_types.append(idx) else: atom_types.append(idx + 1) return atom_names, atom_numbs, np.array(atom_types, dtype=int), nelm
[docs] def get_outcar_block(fp, ml=False): blk = [] energy_token = ["free energy TOTEN", "free energy ML TOTEN"] ml_index = int(ml) for ii in fp: if not ii: return blk blk.append(ii.rstrip("\n")) if energy_token[ml_index] in ii: return blk return blk
# we assume that the force is printed ...
[docs] def get_frames(fname, begin=0, step=1, ml=False, convergence_check=True): fp = open(fname) blk = get_outcar_block(fp) atom_names, atom_numbs, atom_types, nelm = system_info(blk, type_idx_zero=True) ntot = sum(atom_numbs) all_coords = [] all_cells = [] all_energies = [] all_forces = [] all_virials = [] cc = 0 rec_failed = [] while len(blk) > 0: if cc >= begin and (cc - begin) % step == 0: coord, cell, energy, force, virial, is_converge = analyze_block( blk, ntot, nelm, ml ) if len(coord) == 0: break if is_converge or not convergence_check: all_coords.append(coord) all_cells.append(cell) all_energies.append(energy) all_forces.append(force) if virial is not None: all_virials.append(virial) if not is_converge: rec_failed.append(cc + 1) blk = get_outcar_block(fp, ml) cc += 1 if len(rec_failed) > 0: prt = ( "so they are not collected." if convergence_check else "but they are still collected due to the requirement for ignoring convergence checks." ) warnings.warn( f"The following structures were unconverged: {rec_failed}; " + prt ) if len(all_virials) == 0: all_virials = None else: all_virials = np.array(all_virials) fp.close() return ( atom_names, atom_numbs, atom_types, np.array(all_cells), np.array(all_coords), np.array(all_energies), np.array(all_forces), all_virials, )
[docs] def analyze_block(lines, ntot, nelm, ml=False): coord = [] cell = [] energy = None force = [] virial = None is_converge = True sc_index = 0 # select different searching tokens based on the ml label energy_token = ["free energy TOTEN", "free energy ML TOTEN"] energy_index = [4, 5] virial_token = ["FORCE on cell =-STRESS in cart. coord. units", "ML FORCE"] virial_index = [14, 4] cell_token = ["VOLUME and BASIS", "ML FORCE"] cell_index = [5, 12] ml_index = int(ml) for idx, ii in enumerate(lines): # if set ml == True, is_converged will always be True if ("Iteration" in ii) and (not ml): sc_index = int(ii.split()[3][:-1]) if sc_index >= nelm: is_converge = False elif energy_token[ml_index] in ii: energy = float(ii.split()[energy_index[ml_index]]) if len(force) == 0: raise ValueError("cannot find forces in OUTCAR block") if len(coord) == 0: raise ValueError("cannot find coordinates in OUTCAR block") if len(cell) == 0: raise ValueError("cannot find cell in OUTCAR block") return coord, cell, energy, force, virial, is_converge elif cell_token[ml_index] in ii: for dd in range(3): tmp_l = lines[idx + cell_index[ml_index] + dd] cell.append([float(ss) for ss in tmp_l.replace("-", " -").split()[0:3]]) elif virial_token[ml_index] in ii: in_kB_index = virial_index[ml_index] while idx + in_kB_index < len(lines) and ( not lines[idx + in_kB_index].split()[0:2] == ["in", "kB"] ): in_kB_index += 1 assert idx + in_kB_index < len( lines ), 'ERROR: "in kB" is not found in OUTCAR. Unable to extract virial.' tmp_v = [float(ss) for ss in lines[idx + in_kB_index].split()[2:8]] virial = np.zeros([3, 3]) virial[0][0] = tmp_v[0] virial[1][1] = tmp_v[1] virial[2][2] = tmp_v[2] virial[0][1] = tmp_v[3] virial[1][0] = tmp_v[3] virial[1][2] = tmp_v[4] virial[2][1] = tmp_v[4] virial[0][2] = tmp_v[5] virial[2][0] = tmp_v[5] elif "TOTAL-FORCE" in ii and (("ML" in ii) == ml): for jj in range(idx + 2, idx + 2 + ntot): tmp_l = lines[jj] info = [float(ss) for ss in tmp_l.split()] coord.append(info[:3]) force.append(info[3:6]) return coord, cell, energy, force, virial, is_converge