Last active
May 25, 2020 09:16
-
-
Save dizcza/c3676e89a2c00b69258498cf6337db1a to your computer and use it in GitHub Desktop.
set CUDA_VISIBLE_DEVICES env from available GPUs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import subprocess | |
import os | |
class GpuInfo(object): | |
def __init__(self, index, memory_total, memory_used, gpu_load): | |
""" | |
:param index: GPU index | |
:param memory_total: total GPU memory, Mb | |
:param memory_used: GPU memory already in use, Mb | |
:param gpu_load: gpu utilization load, percents | |
""" | |
self.index = int(index) | |
self.memory_total = int(memory_total) | |
self.memory_used = int(memory_used) | |
try: | |
self.gpu_load = int(gpu_load) / 100. | |
except ValueError: | |
# gpu utilization load is not supported in current driver | |
self.gpu_load = 0. | |
def __repr__(self): | |
return "GPU #{}: memory total={} Mb, used={} Mb ({:.1f} %), gpu.load={}".format( | |
self.index, self.memory_total, self.memory_used, 100. * self.memory_used / self.memory_total, self.gpu_load) | |
def get_available_memory_portion(self): | |
return (self.memory_total - self.memory_used) / self.memory_total | |
class NvidiaSmi(object): | |
def __init__(self): | |
command = "nvidia-smi --query-gpu=index,memory.total,memory.used,utilization.gpu --format=csv,noheader,nounits".split() | |
self.gpus = [] | |
try: | |
process = subprocess.Popen(command, | |
universal_newlines=True, | |
stdout=subprocess.PIPE) | |
stdout, stderr_ignored = process.communicate() | |
for line in stdout.splitlines(): | |
index, memory_total, memory_used, gpu_load = line.split(', ') | |
gpu = GpuInfo(index, memory_total, memory_used, gpu_load) | |
self.gpus.append(gpu) | |
except FileNotFoundError: | |
# No GPU is detected. Try running `nvidia-smi` in a terminal." | |
pass | |
def get_gpus(self, min_free_memory=0., max_load=1.): | |
""" | |
:param min_free_memory: filter GPUs with free memory no less than specified, between 0 and 1 | |
:param max_load: max gpu utilization load, between 0 and 1 | |
:return: list of available GpuInfo's | |
""" | |
gpus = [gpu for gpu in self.gpus if gpu.get_available_memory_portion() >= min_free_memory and | |
gpu.gpu_load <= max_load] | |
return gpus | |
def set_cuda_visible_devices(limit_devices=int(1e9), min_free_memory=0.4, max_load=0.6) -> list: | |
""" | |
Automatically sets CUDA_VISIBLE_DEVICES env to first `limit_devices` available GPUs with least used memory. | |
:param limit_devices: limit available GPU devices to use | |
:param min_free_memory: filter GPUs with free memory no less than specified, between 0 and 1 | |
:param max_load: max gpu utilization load, between 0 and 1 | |
""" | |
gpus = NvidiaSmi().get_gpus(min_free_memory, max_load) | |
gpus.sort(key=lambda gpu: gpu.get_available_memory_portion(), reverse=True) | |
limit_devices = min(limit_devices, len(gpus)) | |
gpus = gpus[:limit_devices] | |
os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(gpu.index) for gpu in gpus) | |
print("'CUDA_VISIBLE_DEVICES' is set to '{}'".format(os.environ["CUDA_VISIBLE_DEVICES"])) | |
return gpus |
Author
dizcza
commented
Nov 2, 2017
@dizcza
os.environ[“CUDA_VISIBLE_DEVICES”] = ', '.join(gpus_id)
to
os.environ[“CUDA_VISIBLE_DEVICES”] = ','.join(gpus_id)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment