Setup a lab env using JupyterLab + NVIDIA TAO + Triton on A100 MIG.
# https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html
sudo nvidia-smi mig -lgip # -lgip: list gpu instance profiles
sudo nvidia-smi mig -cgi 19,19,19,19,19,19,19 -C # -cgi: create gpu instance, -C: compute instance
sudo apt install -y git
git clone https://github.com/NVIDIA/data-science-stack
cd data-science-stack
./data-science-stack install-base
./data-science-stack install-docker
./data-science-stack install-miniconda
cd ..
# https://docs.nvidia.com/tao/tao-toolkit/text/tao_toolkit_quick_start_guide.html#installing-tao-toolkit
conda create -n launcher python=3.6
conda activate launcher
pip install nvidia-tao jupyterlab numpy matplotlib tritonclient[all]
pip install --extra-index-url https://developer.download.nvidia.com/compute/redist --upgrade nvidia-dali-cuda110
# NGC CLI
wget --content-disposition https://ngc.nvidia.com/downloads/ngccli_linux.zip && unzip ngccli_linux.zip && chmod u+x ngc-cli/ngc
find ngc-cli/ -type f -exec md5sum {} + | LC_ALL=C sort | md5sum -c ngc-cli.md5
echo "export PATH=\"\$PATH:$(pwd)/ngc-cli\"" >> ~/.bash_profile && source ~/.bash_profile
# docker images
docker pull nvcr.io/nvidia/tritonserver:22.06-py3
docker pull nvcr.io/nvidia/tao/tao-toolkit-tf:v3.22.05-tf1.15.5-py3
UUIDS=$(nvidia-smi -L | grep "UUID: MIG" | awk {'print $6'} | cut -d')' -f1)
PORT=10000
for i in $UUIDS; do
ROOT=$PWD
rsync -a task/* $PORT --exclude data --exclude models
mkdir -p $ROOT/$PORT/models
ln -s $ROOT/task/data $ROOT/$PORT/data
cd $ROOT/$PORT
CUDA_VISIBLE_DEVICES=$i LOCAL_DIR=$ROOT/$PORT HTTP_PORT=$((PORT+100)) nohup jupyter lab --NotebookApp.token="" --ip=0.0.0.0 --port=$PORT &
docker run \
--gpus "device=$i" \
--ipc=host --rm -d \
--shm-size=1g \
--ulimit memlock=-1 \
--ulimit stack=67108864 \
-p $((PORT+100)):8000 -p $((PORT+200)):8001 -p $((PORT+300)):8002 \
-v $ROOT/$PORT/models:/models \
nvcr.io/nvidia/tritonserver:22.06-py3 \
tritonserver \
--model-repository=/models \
--exit-on-error=false \
--model-control-mode=poll \
--repository-poll-secs 30
cd ..
echo "hosting a jupyter lab at $PORT with two Env variables CUDA_VISIBLE_DEVICES=$i and LOCAL_DIR=$ROOT/$PORT"
PORT=$((PORT+1))
done
pkill -f jupyter
rm -rf 100*
docker stop $(docker ps -q --filter ancestor=nvcr.io/nvidia/tritonserver:22.06-py3)