Source code for deepmd.cluster.local

"""Get local GPU resources."""

import os
import socket
import subprocess as sp
import sys

from deepmd.env import tf
from typing import List, Tuple, Optional


__all__ = ["get_gpus", "get_resource"]


[docs]def get_gpus(): """Get available IDs of GPU cards at local. These IDs are valid when used as the TensorFlow device ID. Returns ------- Optional[List[int]] List of available GPU IDs. Otherwise, None. """ test_cmd = 'from tensorflow.python.client import device_lib; ' \ 'devices = device_lib.list_local_devices(); ' \ 'gpus = [d.name for d in devices if d.device_type == "GPU"]; ' \ 'print(len(gpus))' with sp.Popen([sys.executable, "-c", test_cmd], stderr=sp.PIPE, stdout=sp.PIPE) as p: stdout, stderr = p.communicate() if p.returncode != 0: decoded = stderr.decode('UTF-8') raise RuntimeError('Failed to detect availbe GPUs due to:\n%s' % decoded) decoded = stdout.decode('UTF-8').strip() num_gpus = int(decoded) return list(range(num_gpus)) if num_gpus > 0 else None
[docs]def get_resource() -> Tuple[str, List[str], Optional[List[int]]]: """Get local resources: nodename, nodelist, and gpus. Returns ------- Tuple[str, List[str], Optional[List[int]]] nodename, nodelist, and gpus """ nodename = socket.gethostname() nodelist = [nodename] gpus = get_gpus() return nodename, nodelist, gpus