Skip to content

Instantly share code, notes, and snippets.

@dizcza
Last active May 25, 2020 09:16
Show Gist options
  • Save dizcza/c3676e89a2c00b69258498cf6337db1a to your computer and use it in GitHub Desktop.
Save dizcza/c3676e89a2c00b69258498cf6337db1a to your computer and use it in GitHub Desktop.
set CUDA_VISIBLE_DEVICES env from available GPUs
import subprocess
import os
class GpuInfo(object):
def __init__(self, index, memory_total, memory_used, gpu_load):
"""
:param index: GPU index
:param memory_total: total GPU memory, Mb
:param memory_used: GPU memory already in use, Mb
:param gpu_load: gpu utilization load, percents
"""
self.index = int(index)
self.memory_total = int(memory_total)
self.memory_used = int(memory_used)
try:
self.gpu_load = int(gpu_load) / 100.
except ValueError:
# gpu utilization load is not supported in current driver
self.gpu_load = 0.
def __repr__(self):
return "GPU #{}: memory total={} Mb, used={} Mb ({:.1f} %), gpu.load={}".format(
self.index, self.memory_total, self.memory_used, 100. * self.memory_used / self.memory_total, self.gpu_load)
def get_available_memory_portion(self):
return (self.memory_total - self.memory_used) / self.memory_total
class NvidiaSmi(object):
def __init__(self):
command = "nvidia-smi --query-gpu=index,memory.total,memory.used,utilization.gpu --format=csv,noheader,nounits".split()
self.gpus = []
try:
process = subprocess.Popen(command,
universal_newlines=True,
stdout=subprocess.PIPE)
stdout, stderr_ignored = process.communicate()
for line in stdout.splitlines():
index, memory_total, memory_used, gpu_load = line.split(', ')
gpu = GpuInfo(index, memory_total, memory_used, gpu_load)
self.gpus.append(gpu)
except FileNotFoundError:
# No GPU is detected. Try running `nvidia-smi` in a terminal."
pass
def get_gpus(self, min_free_memory=0., max_load=1.):
"""
:param min_free_memory: filter GPUs with free memory no less than specified, between 0 and 1
:param max_load: max gpu utilization load, between 0 and 1
:return: list of available GpuInfo's
"""
gpus = [gpu for gpu in self.gpus if gpu.get_available_memory_portion() >= min_free_memory and
gpu.gpu_load <= max_load]
return gpus
def set_cuda_visible_devices(limit_devices=int(1e9), min_free_memory=0.4, max_load=0.6) -> list:
"""
Automatically sets CUDA_VISIBLE_DEVICES env to first `limit_devices` available GPUs with least used memory.
:param limit_devices: limit available GPU devices to use
:param min_free_memory: filter GPUs with free memory no less than specified, between 0 and 1
:param max_load: max gpu utilization load, between 0 and 1
"""
gpus = NvidiaSmi().get_gpus(min_free_memory, max_load)
gpus.sort(key=lambda gpu: gpu.get_available_memory_portion(), reverse=True)
limit_devices = min(limit_devices, len(gpus))
gpus = gpus[:limit_devices]
os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(gpu.index) for gpu in gpus)
print("'CUDA_VISIBLE_DEVICES' is set to '{}'".format(os.environ["CUDA_VISIBLE_DEVICES"]))
return gpus
@OleksandrMalinin
Copy link

@dizcza
os.environ[“CUDA_VISIBLE_DEVICES”] = ', '.join(gpus_id)
to
os.environ[“CUDA_VISIBLE_DEVICES”] = ','.join(gpus_id)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment