#!/usr/bin/env python3
import xml.etree.ElementTree as ET
import numpy as np
[docs]
def check_name(item, name):
assert (
item.attrib["name"] == name
), "item attrib '{}' dose not math required '{}'".format(item.attrib["name"], name)
[docs]
def get_varray(varray):
array = []
for vv in varray.findall("v"):
array.append([float(ii) for ii in vv.text.split()])
return np.array(array)
[docs]
def analyze_atominfo(atominfo_xml):
check_name(atominfo_xml.find("array"), "atoms")
eles = []
types = []
visited = set()
for ii in atominfo_xml.find("array").find("set"):
atom_type = int(ii.findall("c")[1].text)
if atom_type not in visited:
eles.append(ii.findall("c")[0].text.strip())
visited.add(atom_type)
types.append(atom_type)
return eles, types
[docs]
def analyze_calculation(cc):
structure_xml = cc.find("structure")
check_name(structure_xml.find("crystal").find("varray"), "basis")
check_name(structure_xml.find("varray"), "positions")
cell = get_varray(structure_xml.find("crystal").find("varray"))
posi = get_varray(structure_xml.find("varray"))
strs = None
for vv in cc.findall("varray"):
if vv.attrib["name"] == "forces":
forc = get_varray(vv)
elif vv.attrib["name"] == "stress":
strs = get_varray(vv)
for ii in cc.find("energy").findall("i"):
if ii.attrib["name"] == "e_fr_energy":
ener = float(ii.text)
# print(ener)
# return 'a'
return posi, cell, ener, forc, strs
[docs]
def analyze(fname, type_idx_zero=False, begin=0, step=1):
"""Deal with broken xml file."""
all_posi = []
all_cell = []
all_ener = []
all_forc = []
all_strs = []
cc = 0
try:
for event, elem in ET.iterparse(fname):
if elem.tag == "atominfo":
eles, types = analyze_atominfo(elem)
types = np.array(types, dtype=int)
if type_idx_zero:
types = types - 1
if elem.tag == "calculation":
posi, cell, ener, forc, strs = analyze_calculation(elem)
if cc >= begin and (cc - begin) % step == 0:
all_posi.append(posi)
all_cell.append(cell)
all_ener.append(ener)
all_forc.append(forc)
if strs is not None:
all_strs.append(strs)
cc += 1
except ET.ParseError:
return (
eles,
types,
np.array(all_cell),
np.array(all_posi),
np.array(all_ener),
np.array(all_forc),
np.array(all_strs),
)
return (
eles,
types,
np.array(all_cell),
np.array(all_posi),
np.array(all_ener),
np.array(all_forc),
np.array(all_strs),
)