Source code for dpgen.tools.stat_time

import os
import subprocess


[docs] def stat_time(target_folder, param_file="param.json", verbose=True, mute=False): train_dirs = ( subprocess.run( [ f"ls -d -1 {target_folder}/iter.??????/00.train/", ], shell=True, stdout=subprocess.PIPE, ) .stdout.decode() .strip() .split("\n") ) for dir in train_dirs: abs_dir = os.path.abspath(dir) stage = os.path.basename(os.path.dirname(dir)) train_time_logs = ( subprocess.run( [ f"grep -H --text 'wall time' {dir}/???/train.log", ], shell=True, stdout=subprocess.PIPE, ) .stdout.decode() .strip() .split("\n") ) upload_task_dir_num = ( subprocess.run( [ f"ls -1 -d {dir}/??? |wc -l", ], shell=True, stdout=subprocess.PIPE, ) .stdout.decode() .strip("\n") ) total_core_sec = float(0) # assume training on single GPU paral_cores = 1 finished_task_file_num = len(train_time_logs) # gpu_type_set = set([]) for log in train_time_logs: # log example : # .//iter.000000/00.train//003/train.log:# DEEPMD: wall time: 7960.265 s # print(log.split(':')) file_path, text1, text2, wall_time = log.split(":") # pylint: disable=unused-variable abs_file_path = os.path.abspath(file_path) # stage=='00.train' wall_time_sec = float(wall_time.strip("s").strip(" ")) total_core_sec += wall_time_sec * paral_cores # r'd\nja\1lgje' leading 'r' means dont treat '\' as Escape character # gpu_type = subprocess.run([fr"grep -e 'physical GPU' {abs_file_path} |sed -n -E -e 's|^.*name: (.*), pci.*|\1|p'", ], # shell=True,stdout=subprocess.PIPE).stdout.decode().strip().split('\n').pop() # gpu_type_set.add(gpu_type) total_core_hour = total_core_sec * paral_cores / 3600 print( f"{stage}:{abs_dir}" f"paral_cores:{paral_cores}" f":upload_task_dir_num:{upload_task_dir_num}" f":finished_task_file_num:{finished_task_file_num}" f":total_core_hour:{total_core_hour:.3f}" ) model_devi_dirs = ( subprocess.run( [ f"ls -d -1 {target_folder}/iter.??????/01.model_devi/", ], shell=True, stdout=subprocess.PIPE, ) .stdout.decode() .strip() .split("\n") ) # print(model_devi_dirs) for dir in model_devi_dirs: abs_dir = os.path.abspath(dir) stage = os.path.basename(os.path.dirname(dir)) # print(dir) model_devi_time_logs = ( subprocess.run( [ f"grep -H --text 'wall time' {dir}/task.*/log.lammps", ], shell=True, stdout=subprocess.PIPE, ) .stdout.decode() .strip() .split("\n") ) upload_task_dir_num = ( subprocess.run( [ f"ls -1 -d {dir}/task.* |wc -l", ], shell=True, stdout=subprocess.PIPE, ) .stdout.decode() .strip("\n") ) total_core_sec = float(0) finished_task_file_num = len(model_devi_time_logs) # assume model_devi lammps job running on GPUs , set paral_cores==1 paral_cores = 1 for log in model_devi_time_logs: # log example: # .//iter.000002/01.model_devi//task.018.000075/log.lammps:Total wall time: 0:00:39 # print(log) file_path, text1, hour, min, sec = log.split(":") # pylint: disable=unused-variable abs_file_path = os.path.abspath(file_path) wall_time_sec = 3600 * int(hour) + 60 * int(min) + 1 * int(sec) total_core_sec += wall_time_sec * paral_cores total_core_hour = total_core_sec / 3600 print( f"{stage}:{abs_dir}" f":paral_cores:{paral_cores}" f":upload_task_dir_num:{upload_task_dir_num}" f":finished_task_file_num:{finished_task_file_num}" f":total_core_hour:{total_core_hour:.3f}" ) fp_dirs = ( subprocess.run( [ f"ls -d -1 {target_folder}/iter.??????/02.fp/", ], shell=True, stdout=subprocess.PIPE, ) .stdout.decode() .strip() .split("\n") ) for dir in fp_dirs: abs_dir = os.path.abspath(dir) stage = os.path.basename(os.path.dirname(dir)) fp_time_logs = ( subprocess.run( [ f"grep -H --text 'CPU time' {dir}/task.*/OUTCAR", ], shell=True, stdout=subprocess.PIPE, ) .stdout.decode() .strip() .split("\n") ) upload_task_dir_num = ( subprocess.run( [ f"ls -1 -d {dir}/task.* |wc -l", ], shell=True, stdout=subprocess.PIPE, ) .stdout.decode() .strip("\n") ) total_core_sec = float(0) finished_task_file_num = len(fp_time_logs) for log in fp_time_logs: # log example: # .//iter.000002/02.fp//task.018.000048/OUTCAR: Total CPU time used (sec): 288.395 file_path, text1, sec = log.split(":") abs_file_path = os.path.abspath(file_path) wall_time_sec = float(sec) paral_cores = ( subprocess.run( [ rf"head -n 1000 {abs_file_path} | grep 'running on' | sed -n -E -e 's|running on\s+([0-9]+)+\s.*|\1|p' ", ], shell=True, stdout=subprocess.PIPE, ) .stdout.decode() .strip() ) total_core_sec += wall_time_sec * int(paral_cores) total_core_hour = total_core_sec / 3600 print( f"{stage}:{abs_dir}" f":paral_cores:{paral_cores}" f":upload_task_dir_num:{upload_task_dir_num}" f":finished_task_file_num:{finished_task_file_num}" f":total_core_hour:{total_core_hour:.3f}" )
if __name__ == "__main__": stat_time(target_folder="./")