Last active
April 1, 2024 23:55
-
-
Save rhee-elten/223124ba106e7b9927078a4a1a8c320e to your computer and use it in GitHub Desktop.
cuda_select_best_memfree.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
## cuda_select_best_memfree.py 는 python2 에서도 실행되면 좋겠다. (시스템 파이썬 사용) | |
## gist https://gist.github.com/rhee-elten/223124ba106e7b9927078a4a1a8c320e | |
from __future__ import print_function | |
import sys | |
import os | |
import subprocess | |
_is_python3 = sys.version_info[0] >= 3 | |
if _is_python3: | |
sub_popen = lambda *args, **kwargs: subprocess.Popen( | |
*args, encoding="utf-8", **kwargs | |
) | |
else: | |
FileNotFoundError = OSError # python 2.x throws OSError if binary not found | |
sub_popen = subprocess.Popen | |
def _printenv(varname): | |
if varname in os.environ: | |
print("export {:s}={}".format(varname, os.environ[varname])) | |
else: | |
print("export -n {:s}".format(varname)) | |
def cuda_memfree(): | |
# note: pstate: P0 (maximum performance) to P12 (minimum performance) | |
for smi_bin in [ | |
"nvidia-smi", | |
"C:/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi", | |
]: | |
try: | |
# output example for nvidia-smi query: | |
# index, memory.free [MiB], pstate, utilization.gpu [%] | |
# 0, 10382 MiB, P8, 2 % | |
# 1, 10988 MiB, P8, 0 % | |
with sub_popen( | |
[ | |
smi_bin, | |
"--query-gpu=index,memory.free,pstate,utilization.gpu", | |
"--format=csv", | |
], | |
stdout=subprocess.PIPE, | |
).stdout as strm: | |
rows = [x.strip().split(",") for x in strm.readlines()] | |
# rows[1:] <== strip header | |
# x[1].strip()[1:-4] <== strip leading spaces and suffix 'MiB' | |
memfree = sorted( | |
[ | |
( | |
int(x[0]), | |
int(x[1].strip()[:-4]), | |
x[2].strip(), | |
x[3].strip()[:-2], | |
) | |
for x in rows[1:] | |
], | |
key=lambda x: -x[1], | |
) | |
return memfree | |
except FileNotFoundError: | |
pass | |
except: | |
_, ex_value, _ = sys.exc_info() | |
print(repr(ex_value), file=sys.stderr) | |
return [] | |
def cuda_best_memfree(num_gpus=1): | |
memfree = cuda_memfree() | |
devices = ",".join([str(x[0]) for x in memfree[:num_gpus]]) if memfree else None | |
return devices, memfree | |
def cuda_select_best_memfree(num_gpus=1): | |
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true" # make sure | |
devices, memfree = cuda_best_memfree(num_gpus=num_gpus) | |
if devices is not None: | |
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # make sure | |
os.environ["CUDA_VISIBLE_DEVICES"] = devices | |
_printenv("TF_FORCE_GPU_ALLOW_GROWTH") | |
_printenv("CUDA_DEVICE_ORDER") | |
_printenv("CUDA_VISIBLE_DEVICES") | |
return os.environ.get("CUDA_VISIBLE_DEVICES", None), memfree | |
# %% | |
if __name__ == "__main__": | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument("num_gpus", type=int, nargs="?", default="1") | |
parser.add_argument("-v", "--verbose", action="store_true") | |
args = parser.parse_args() | |
if args.verbose: | |
print(cuda_memfree()) | |
cuda_select_best_memfree(args.num_gpus) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment