dizcza · May 25, 2020 09:16 · OleksandrMalinin · Nov 2, 2017
diff --git a/python-nvidia-smi.py b/python-nvidia-smi.py
 import subprocess
 import os


 class GpuInfo(object):
    def __init__(self, index, memory_total, memory_used, gpu_load):
        """
        :param index: GPU index
        :param memory_total: total GPU memory, Mb
        :param memory_used: GPU memory already in use, Mb
        :param gpu_load: gpu utilization load, percents
        """
        self.index = int(index)
        self.memory_total = int(memory_total)
        self.memory_used = int(memory_used)
        try:
            self.gpu_load = int(gpu_load) / 100.
        except ValueError:
            # gpu utilization load is not supported in current driver
            self.gpu_load = 0.

    def __repr__(self):
        return "GPU #{}: memory total={} Mb, used={} Mb ({:.1f} %), gpu.load={}".format(
            self.index, self.memory_total, self.memory_used, 100. * self.memory_used / self.memory_total, self.gpu_load)

    def get_available_memory_portion(self):
        return (self.memory_total - self.memory_used) / self.memory_total


 class NvidiaSmi(object):
    def __init__(self):
        command = "nvidia-smi --query-gpu=index,memory.total,memory.used,utilization.gpu --format=csv,noheader,nounits".split()
        self.gpus = []
        try:
            process = subprocess.Popen(command,
                                       universal_newlines=True,
                                       stdout=subprocess.PIPE)
            stdout, stderr_ignored = process.communicate()
            for line in stdout.splitlines():
                index, memory_total, memory_used, gpu_load = line.split(', ')
                gpu = GpuInfo(index, memory_total, memory_used, gpu_load)
                self.gpus.append(gpu)
        except FileNotFoundError:
            # No GPU is detected. Try running `nvidia-smi` in a terminal."
            pass

    def get_gpus(self, min_free_memory=0., max_load=1.):
        """
        :param min_free_memory: filter GPUs with free memory no less than specified, between 0 and 1
        :param max_load: max gpu utilization load, between 0 and 1
        :return: list of available GpuInfo's
        """
        gpus = [gpu for gpu in self.gpus if gpu.get_available_memory_portion() >= min_free_memory and
                gpu.gpu_load <= max_load]
        return gpus


 def set_cuda_visible_devices(limit_devices=int(1e9), min_free_memory=0.4, max_load=0.6) -> list:
    """
    Automatically sets CUDA_VISIBLE_DEVICES env to first `limit_devices` available GPUs with least used memory.
    :param limit_devices: limit available GPU devices to use
    :param min_free_memory: filter GPUs with free memory no less than specified, between 0 and 1
    :param max_load: max gpu utilization load, between 0 and 1
    """
    gpus = NvidiaSmi().get_gpus(min_free_memory, max_load)
    gpus.sort(key=lambda gpu: gpu.get_available_memory_portion(), reverse=True)
    limit_devices = min(limit_devices, len(gpus))
    gpus = gpus[:limit_devices]
    os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(gpu.index) for gpu in gpus)
    print("'CUDA_VISIBLE_DEVICES' is set to '{}'".format(os.environ["CUDA_VISIBLE_DEVICES"]))
    return gpus
	import subprocess
	import os


	class GpuInfo(object):
	def __init__(self, index, memory_total, memory_used, gpu_load):
	"""
	:param index: GPU index
	:param memory_total: total GPU memory, Mb
	:param memory_used: GPU memory already in use, Mb
	:param gpu_load: gpu utilization load, percents
	"""
	self.index = int(index)
	self.memory_total = int(memory_total)
	self.memory_used = int(memory_used)
	try:
	self.gpu_load = int(gpu_load) / 100.
	except ValueError:
	# gpu utilization load is not supported in current driver
	self.gpu_load = 0.

	def __repr__(self):
	return "GPU #{}: memory total={} Mb, used={} Mb ({:.1f} %), gpu.load={}".format(
	self.index, self.memory_total, self.memory_used, 100. * self.memory_used / self.memory_total, self.gpu_load)

	def get_available_memory_portion(self):
	return (self.memory_total - self.memory_used) / self.memory_total


	class NvidiaSmi(object):
	def __init__(self):
	command = "nvidia-smi --query-gpu=index,memory.total,memory.used,utilization.gpu --format=csv,noheader,nounits".split()
	self.gpus = []
	try:
	process = subprocess.Popen(command,
	universal_newlines=True,
	stdout=subprocess.PIPE)
	stdout, stderr_ignored = process.communicate()
	for line in stdout.splitlines():
	index, memory_total, memory_used, gpu_load = line.split(', ')
	gpu = GpuInfo(index, memory_total, memory_used, gpu_load)
	self.gpus.append(gpu)
	except FileNotFoundError:
	# No GPU is detected. Try running `nvidia-smi` in a terminal."
	pass

	def get_gpus(self, min_free_memory=0., max_load=1.):
	"""
	:param min_free_memory: filter GPUs with free memory no less than specified, between 0 and 1
	:param max_load: max gpu utilization load, between 0 and 1
	:return: list of available GpuInfo's
	"""
	gpus = [gpu for gpu in self.gpus if gpu.get_available_memory_portion() >= min_free_memory and
	gpu.gpu_load <= max_load]
	return gpus


	def set_cuda_visible_devices(limit_devices=int(1e9), min_free_memory=0.4, max_load=0.6) -> list:
	"""
	Automatically sets CUDA_VISIBLE_DEVICES env to first `limit_devices` available GPUs with least used memory.
	:param limit_devices: limit available GPU devices to use
	:param min_free_memory: filter GPUs with free memory no less than specified, between 0 and 1
	:param max_load: max gpu utilization load, between 0 and 1
	"""
	gpus = NvidiaSmi().get_gpus(min_free_memory, max_load)
	gpus.sort(key=lambda gpu: gpu.get_available_memory_portion(), reverse=True)
	limit_devices = min(limit_devices, len(gpus))
	gpus = gpus[:limit_devices]
	os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(gpu.index) for gpu in gpus)
	print("'CUDA_VISIBLE_DEVICES' is set to '{}'".format(os.environ["CUDA_VISIBLE_DEVICES"]))
	return gpus