Last active
May 26, 2025 19:48
-
-
Save erdaltoprak/cdc1ec4056b81a9da540229dcde3aa0b to your computer and use it in GitHub Desktop.
proxmox-vm-gpu-nvidia-automated-developer-setup.sh
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Configuration parameters | |
PYTHON_VERSION="3.11.11" | |
CUDA_VERSION="12.6.3" | |
CUDA_DRIVER="560.35.05" | |
SCRIPT_VERSION="1.0.0" | |
GITHUB_URL="https://gist.github.com/erdaltoprak/cdc1ec4056b81a9da540229dcde3aa0b" | |
CLEANUP_CUDA="no" # Set to "no" to keep the CUDA installer and "yes" to remove it | |
# Define colors for output | |
GREEN="\033[0;32m" | |
YELLOW="\033[1;33m" | |
RED="\033[0;31m" | |
BLUE="\033[0;34m" | |
CYAN="\033[0;36m" | |
MAGENTA="\033[0;35m" | |
BOLD="\033[1m" | |
RESET="\033[0m" | |
# Setup logging | |
LOGFILE="/var/log/server-setup.log" | |
log() { | |
local color="" | |
local msg="$1" | |
local log_type="${2:-INFO}" | |
case "$log_type" in | |
INFO) color="" ;; | |
SUCCESS) color="$GREEN" ;; | |
WARNING) color="$YELLOW" ;; | |
ERROR) color="$RED" ;; | |
SECTION) color="$CYAN$BOLD" ;; | |
SKIP) color="$MAGENTA" ;; | |
*) color="" ;; | |
esac | |
# Print colorized output to terminal but plain text to log file | |
echo -e "${color}$(date +"%Y-%m-%d %H:%M:%S"): $msg${RESET}" | tee >(sed 's/\x1b\[[0-9;]*m//g' >> "$LOGFILE") | |
} | |
# Helper functions for specific log types | |
log_success() { log "$1" "SUCCESS"; } | |
log_warning() { log "$1" "WARNING"; } | |
log_error() { log "$1" "ERROR"; } | |
log_section() { echo ""; log "$1" "SECTION"; echo ""; } | |
log_skip() { log "$1" "SKIP"; } | |
clear | |
cat << EOF | |
############################################################################## | |
SERVER SETUP SCRIPT | |
${GITHUB_URL} | |
############################################## | |
This script is for: | |
- Fresh installs of Ubuntu 24.04 x86_64 | |
- A relatively modern Nvidia GPU | |
############################################### | |
This script will do the following: | |
- Install Docker (rootful) | |
- Install zsh | |
- Install pyenv | |
- Install Node.js LTS (through n-install) | |
- Install locales (en_US.UTF-8) | |
- Install Python (global default 3.11.11) | |
- Install Nvidia Drivers (cuda_12.6.3_560.35.05) | |
- Install Nvidia Container Toolkit (rootful) | |
- Configure Docker to use Nvidia Container Toolkit | |
- Test Nvidia Container Toolkit with a sample workload | |
- Clean up marker files after successful setup | |
############################################### | |
You will be prompted to reboot your system twice: | |
- Once after the Nouveau driver is disabled | |
- Once after the Nvidia drivers and CUDA Toolkit are installed | |
You will also be prompted to reboot your system after the script completes. | |
Disclaimer: | |
- This script is provided under the Apache License, Version 2.0 | |
- The author is not responsible for any damage or loss incurred while using this script | |
- Use at your own risk | |
- You should check the script before running it | |
############################################################################## | |
Press ENTER to continue or CTRL+C to abort | |
EOF | |
read -r | |
######################################################## | |
# Launch with sudo ./setup.sh | |
######################################################## | |
# Check if script is run as root | |
if [ "$EUID" -ne 0 ]; then | |
log_error "Please run as root (sudo ./setup.sh)" | |
exit 1 | |
fi | |
# Update and upgrade | |
log_section "SYSTEM UPDATE" | |
log "Updating and upgrading..." | |
apt update | |
apt upgrade -y | |
log_success "System updated successfully!" | |
# Install Docker | |
log_section "DOCKER INSTALLATION" | |
# Check if Docker is already installed | |
if command -v docker &> /dev/null; then | |
log_skip "Docker already installed. Skipping installation." | |
else | |
log "Installing Docker..." | |
curl -fsSL https://get.docker.com -o get-docker.sh | |
sh get-docker.sh | |
rm -f get-docker.sh | |
log_success "Docker installed successfully!" | |
fi | |
# Install zsh | |
log_section "ZSH INSTALLATION" | |
# Check if zsh is already installed | |
if ! command -v zsh &> /dev/null; then | |
log "Installing zsh..." | |
apt install zsh -y | |
log_success "zsh installed successfully!" | |
else | |
log_skip "zsh already installed. Skipping installation." | |
fi | |
# Set zsh as default shell for the current user | |
chsh -s $(which zsh) $SUDO_USER | |
# Create empty .zshrc if it doesn't exist | |
if [ ! -f "/home/$SUDO_USER/.zshrc" ]; then | |
log "Creating empty .zshrc file..." | |
touch "/home/$SUDO_USER/.zshrc" | |
chown $SUDO_USER:$SUDO_USER "/home/$SUDO_USER/.zshrc" | |
fi | |
# Add common zsh aliases and configuration | |
log "Configuring zsh with common aliases and settings..." | |
if ! grep -q "# Common aliases" "/home/$SUDO_USER/.zshrc"; then | |
echo '# Common aliases' >> "/home/$SUDO_USER/.zshrc" | |
echo "alias c='clear'" >> "/home/$SUDO_USER/.zshrc" | |
echo "alias lss='ls -lah --color=auto'" >> "/home/$SUDO_USER/.zshrc" | |
echo "alias ls='ls --color=auto'" >> "/home/$SUDO_USER/.zshrc" | |
echo "alias rmm='rm -rf'" >> "/home/$SUDO_USER/.zshrc" | |
echo "alias edit='nano /home/$SUDO_USER/.zshrc'" >> "/home/$SUDO_USER/.zshrc" | |
echo "alias reload='source /home/$SUDO_USER/.zshrc'" >> "/home/$SUDO_USER/.zshrc" | |
log_success "Added common aliases to .zshrc" | |
else | |
log_skip "Common aliases already configured in .zshrc. Skipping." | |
fi | |
# Fix locale settings | |
log_section "LOCALE CONFIGURATION" | |
log "Configuring locale settings..." | |
apt-get install -y locales | |
locale-gen en_US.UTF-8 | |
# Add locale settings to .zshrc if not already there | |
if ! grep -q "# Locale settings" "/home/$SUDO_USER/.zshrc"; then | |
echo '# Locale settings' >> "/home/$SUDO_USER/.zshrc" | |
echo 'export LANG=en_US.UTF-8' >> "/home/$SUDO_USER/.zshrc" | |
echo 'export LC_ALL=en_US.UTF-8' >> "/home/$SUDO_USER/.zshrc" | |
echo 'export LC_CTYPE=en_US.UTF-8' >> "/home/$SUDO_USER/.zshrc" | |
log_success "Added locale settings to .zshrc" | |
else | |
log_skip "Locale settings already configured in .zshrc. Skipping." | |
fi | |
# Install pyenv | |
log_section "PYENV AND PYTHON INSTALLATION" | |
log "Checking for pyenv..." | |
# First check if .pyenv directory exists | |
if [ -d "/home/$SUDO_USER/.pyenv" ]; then | |
log_skip "pyenv directory found. Checking installation..." | |
# Check if pyenv command is available | |
if sudo -u $SUDO_USER bash -c "command -v pyenv" &> /dev/null; then | |
log "pyenv successfully installed. Checking Python version..." | |
# Check if Python version is already installed with pyenv | |
if sudo -u $SUDO_USER bash -c "PYENV_ROOT='/home/$SUDO_USER/.pyenv' PATH='/home/$SUDO_USER/.pyenv/bin:$PATH' pyenv versions | grep -q '$PYTHON_VERSION'"; then | |
log_skip "Python $PYTHON_VERSION already installed with pyenv. Skipping Python installation." | |
else | |
log "Python $PYTHON_VERSION not found. Installing..." | |
# Install Python with -s flag to skip if exists | |
sudo -u $SUDO_USER bash -c "PYENV_ROOT='/home/$SUDO_USER/.pyenv' PATH='/home/$SUDO_USER/.pyenv/bin:$PATH' pyenv install -s $PYTHON_VERSION && pyenv global $PYTHON_VERSION" | |
log_success "Python $PYTHON_VERSION has been installed and set as global default." | |
fi | |
else | |
log_warning "pyenv directory exists but command not available. Checking .zshrc configuration..." | |
# Ensure pyenv is properly configured in .zshrc | |
if ! grep -q "PYENV_ROOT" "/home/$SUDO_USER/.zshrc"; then | |
log "Adding pyenv configuration to .zshrc..." | |
echo 'export PYENV_ROOT="$HOME/.pyenv"' >> "/home/$SUDO_USER/.zshrc" | |
echo '[[ -d $PYENV_ROOT/bin ]] && export PATH="$PYENV_ROOT/bin:$PATH"' >> "/home/$SUDO_USER/.zshrc" | |
echo 'eval "$(pyenv init - zsh)"' >> "/home/$SUDO_USER/.zshrc" | |
log_success "pyenv configuration added to .zshrc. Python installation will be skipped." | |
log_warning "Please run 'source ~/.zshrc' after this script completes to activate pyenv." | |
fi | |
fi | |
else | |
log "Installing pyenv..." | |
# Install pyenv dependencies | |
apt-get install -y build-essential libssl-dev zlib1g-dev libbz2-dev \ | |
libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev \ | |
xz-utils tk-dev libffi-dev liblzma-dev git | |
# Install pyenv as the user | |
sudo -u $SUDO_USER bash -c "curl -fsSL https://pyenv.run | bash" | |
# Configure pyenv in .zshrc if not already configured | |
if ! grep -q "PYENV_ROOT" "/home/$SUDO_USER/.zshrc"; then | |
log "Configuring pyenv in .zshrc..." | |
echo 'export PYENV_ROOT="$HOME/.pyenv"' >> "/home/$SUDO_USER/.zshrc" | |
echo '[[ -d $PYENV_ROOT/bin ]] && export PATH="$PYENV_ROOT/bin:$PATH"' >> "/home/$SUDO_USER/.zshrc" | |
echo 'eval "$(pyenv init - zsh)"' >> "/home/$SUDO_USER/.zshrc" | |
fi | |
# Source the updated .zshrc to make pyenv available | |
log "Activating pyenv..." | |
# We need to set up the environment for the current session to install Python | |
export PYENV_ROOT="/home/$SUDO_USER/.pyenv" | |
export PATH="$PYENV_ROOT/bin:$PATH" | |
# Install Python and set as global | |
log "Installing Python $PYTHON_VERSION with pyenv..." | |
# Need to run as the user, not as root, use -s flag to skip if exists | |
if sudo -u $SUDO_USER bash -c "PYENV_ROOT='/home/$SUDO_USER/.pyenv' PATH='/home/$SUDO_USER/.pyenv/bin:$PATH' pyenv install -s $PYTHON_VERSION"; then | |
log "Setting Python $PYTHON_VERSION as global default..." | |
sudo -u $SUDO_USER bash -c "PYENV_ROOT='/home/$SUDO_USER/.pyenv' PATH='/home/$SUDO_USER/.pyenv/bin:$PATH' pyenv global $PYTHON_VERSION" | |
log_success "Python $PYTHON_VERSION has been installed and set as the global default." | |
else | |
log_error "Failed to install Python $PYTHON_VERSION. Please install it manually after setup." | |
fi | |
fi | |
# Install Node.js via "n" version manager | |
log_section "NODE.JS INSTALLATION" | |
log "Checking for Node.js via 'n' version manager..." | |
if [ -d "/home/$SUDO_USER/n" ]; then | |
log_skip "'n' directory found. Checking installation..." | |
# Check if n command is available | |
if sudo -u $SUDO_USER bash -c "command -v n" &> /dev/null; then | |
log_success "'n' successfully installed. Node.js should be available." | |
else | |
log_warning "'n' directory exists but command not available. Checking .zshrc configuration..." | |
# Ensure n is properly configured in .zshrc | |
if ! grep -q "N_PREFIX" "/home/$SUDO_USER/.zshrc"; then | |
log "Adding 'n' configuration to .zshrc..." | |
echo 'export N_PREFIX="$HOME/n"' >> "/home/$SUDO_USER/.zshrc" | |
echo 'export PATH="$N_PREFIX/bin:$PATH"' >> "/home/$SUDO_USER/.zshrc" | |
log_success "'n' configuration added to .zshrc." | |
fi | |
fi | |
else | |
log "Installing 'n' version manager and Node.js..." | |
# Install n as the user | |
sudo -u $SUDO_USER bash -c "curl -sL https://bit.ly/n-install | bash -s -- -q" | |
# Check if installation was successful | |
if [ -d "/home/$SUDO_USER/n" ]; then | |
log_success "'n' and Node.js installed successfully." | |
# Ensure n is properly configured in .zshrc if not already done by installer | |
if ! grep -q "N_PREFIX" "/home/$SUDO_USER/.zshrc"; then | |
log "Adding 'n' configuration to .zshrc..." | |
echo 'export N_PREFIX="$HOME/n"' >> "/home/$SUDO_USER/.zshrc" | |
echo 'export PATH="$N_PREFIX/bin:$PATH"' >> "/home/$SUDO_USER/.zshrc" | |
fi | |
else | |
log_error "Failed to install 'n' and Node.js. Please install manually after setup." | |
fi | |
fi | |
# Install Nvidia Drivers | |
log_section "NVIDIA DRIVERS & CUDA INSTALLATION" | |
# Check if NVIDIA GPU is detected | |
log "Checking for NVIDIA GPU..." | |
if ! lspci | grep -i nvidia; then | |
log_warning "NVIDIA GPU not detected. Skipping driver installation." | |
else | |
log_success "NVIDIA GPU detected." | |
# Check if NVIDIA drivers are already working properly | |
if command -v nvidia-smi &> /dev/null && nvidia-smi &> /dev/null; then | |
# Check if we're in a post-install state where reboot was recommended but not done | |
if [ -f "/tmp/.nvidia_driver_installed_needs_reboot" ]; then | |
log_warning "NVIDIA drivers were recently installed but system hasn't been rebooted." | |
log_warning "It's highly recommended to reboot before continuing." | |
echo "Continue anyway? (y/n)" | |
read -r answer | |
if [ "$answer" != "y" ] && [ "$answer" != "Y" ]; then | |
log_error "Exiting. Please reboot and run the script again." | |
exit 0 | |
else | |
log_warning "Continuing without reboot (not recommended)..." | |
rm -f "/tmp/.nvidia_driver_installed_needs_reboot" | |
fi | |
fi | |
log_skip "NVIDIA drivers are already installed and working properly. Skipping driver installation." | |
# Check if CUDA is already installed | |
if [ -d "/usr/local/cuda" ] || [ -d "/usr/lib/cuda" ]; then | |
log_skip "CUDA is also installed. Skipping CUDA installation." | |
# Ensure CUDA paths are in .zshrc | |
log "Checking CUDA paths in .zshrc..." | |
if ! grep -q "export PATH=.*cuda" "/home/$SUDO_USER/.zshrc"; then | |
log "Adding CUDA to PATH in .zshrc..." | |
echo '# CUDA paths' >> "/home/$SUDO_USER/.zshrc" | |
echo 'export PATH=$PATH:/usr/local/cuda/bin' >> "/home/$SUDO_USER/.zshrc" | |
fi | |
if ! grep -q "export LD_LIBRARY_PATH=.*cuda" "/home/$SUDO_USER/.zshrc"; then | |
log "Adding CUDA to LD_LIBRARY_PATH in .zshrc..." | |
if ! grep -q "# CUDA paths" "/home/$SUDO_USER/.zshrc"; then | |
echo '# CUDA paths' >> "/home/$SUDO_USER/.zshrc" | |
fi | |
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64' >> "/home/$SUDO_USER/.zshrc" | |
fi | |
# Check if NVIDIA Container Toolkit is installed | |
if dpkg -l | grep -q nvidia-container-toolkit; then | |
log_skip "NVIDIA Container Toolkit already installed. Skipping installation." | |
else | |
log_section "NVIDIA CONTAINER TOOLKIT INSTALLATION" | |
log "Installing NVIDIA Container Toolkit..." | |
# Configure the production repository for NVIDIA Container Toolkit | |
log "Setting up NVIDIA Container Toolkit repository..." | |
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ | |
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ | |
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ | |
tee /etc/apt/sources.list.d/nvidia-container-toolkit.list | |
# Update package list | |
log "Updating package list..." | |
apt-get update | |
# Install NVIDIA Container Toolkit | |
log "Installing NVIDIA Container Toolkit packages..." | |
apt-get install -y nvidia-container-toolkit | |
# Configure Docker to use NVIDIA Container Toolkit | |
log "Configuring Docker to use NVIDIA Container Toolkit..." | |
nvidia-ctk runtime configure --runtime=docker | |
# Restart Docker daemon | |
log "Restarting Docker daemon..." | |
systemctl restart docker | |
log_success "NVIDIA Container Toolkit installation and configuration completed." | |
fi | |
else | |
log_warning "NVIDIA drivers are working but CUDA is not installed. Installing CUDA..." | |
apt-get install linux-headers-$(uname -r) -y | |
# Install CUDA | |
log "Installing CUDA Toolkit..." | |
cuda_installer="cuda_${CUDA_VERSION}_${CUDA_DRIVER}_linux.run" | |
if [ -f "$cuda_installer" ]; then | |
log_skip "CUDA installer already exists, skipping download..." | |
else | |
log "Downloading CUDA installer..." | |
if ! wget https://developer.download.nvidia.com/compute/cuda/$CUDA_VERSION/local_installers/$cuda_installer; then | |
log_error "Failed to download CUDA installer. Exiting." | |
exit 1 | |
fi | |
fi | |
# Run the installer with appropriate options (without driver) | |
log "Running CUDA installer (skipping driver installation)..." | |
sh $cuda_installer --silent --toolkit --samples --samplespath="/home/$SUDO_USER/cuda_samples" --no-opengl-libs | |
# Add CUDA to PATH and LD_LIBRARY_PATH in zshrc | |
log "Adding CUDA to PATH in .zshrc..." | |
echo '# CUDA paths' >> "/home/$SUDO_USER/.zshrc" | |
echo 'export PATH=$PATH:/usr/local/cuda/bin' >> "/home/$SUDO_USER/.zshrc" | |
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64' >> "/home/$SUDO_USER/.zshrc" | |
# Clean up CUDA installer if requested | |
if [ "$CLEANUP_CUDA" = "yes" ]; then | |
log "Cleaning up CUDA installer..." | |
rm -f "$cuda_installer" | |
else | |
log_skip "Keeping CUDA installer as requested." | |
fi | |
fi | |
else | |
# Check if Nouveau is already disabled (meaning we're in post-reboot phase) | |
if ! lsmod | grep -q nouveau; then | |
# Check if we're in a post-Nouveau-disable state where reboot was recommended but not done | |
if [ -f "/tmp/.nouveau_disabled_needs_reboot" ]; then | |
log_warning "Nouveau driver was recently disabled but system hasn't been rebooted." | |
log_warning "It's highly recommended to reboot before continuing with NVIDIA installation." | |
echo "Continue anyway? (y/n)" | |
read -r answer | |
if [ "$answer" != "y" ] && [ "$answer" != "Y" ]; then | |
log_error "Exiting. Please reboot and run the script again." | |
exit 0 | |
else | |
log_warning "Continuing without reboot (not recommended)..." | |
rm -f "/tmp/.nouveau_disabled_needs_reboot" | |
fi | |
fi | |
log_success "Nouveau driver not loaded. Proceeding with NVIDIA installation..." | |
log "Checking for CUDA..." | |
# Check if CUDA is already installed | |
if [ -d "/usr/local/cuda" ] || [ -d "/usr/lib/cuda" ]; then | |
log_skip "CUDA already installed. Skipping installation." | |
else | |
log "CUDA not installed. Installing CUDA..." | |
apt-get install linux-headers-$(uname -r) -y | |
# Install NVIDIA drivers and CUDA | |
log "Installing Nvidia Drivers and CUDA Toolkit..." | |
cuda_installer="cuda_${CUDA_VERSION}_${CUDA_DRIVER}_linux.run" | |
if [ -f "$cuda_installer" ]; then | |
log_skip "CUDA installer already exists, skipping download..." | |
else | |
log "Downloading CUDA installer..." | |
if ! wget https://developer.download.nvidia.com/compute/cuda/$CUDA_VERSION/local_installers/$cuda_installer; then | |
log_error "Failed to download CUDA installer. Exiting." | |
exit 1 | |
fi | |
fi | |
# Run the installer with appropriate options | |
log "Running CUDA installer..." | |
sh $cuda_installer --silent --driver --toolkit --samples --samplespath="/home/$SUDO_USER/cuda_samples" --no-opengl-libs | |
# Verify CUDA installation | |
if [ ! -d "/usr/local/cuda" ] && [ ! -d "/usr/lib/cuda" ]; then | |
log_error "CUDA installation verification failed. CUDA directory not found." | |
exit 1 | |
fi | |
# Create a marker file to indicate we need a reboot | |
touch "/tmp/.nvidia_driver_installed_needs_reboot" | |
# Clean up CUDA installer if requested | |
if [ "$CLEANUP_CUDA" = "yes" ]; then | |
log "Cleaning up CUDA installer..." | |
rm -f "$cuda_installer" | |
else | |
log_skip "Keeping CUDA installer as requested." | |
fi | |
fi | |
# Add CUDA to PATH and LD_LIBRARY_PATH in zshrc if not already there | |
log "Checking CUDA paths in .zshrc..." | |
if ! grep -q "export PATH=.*cuda" "/home/$SUDO_USER/.zshrc"; then | |
log "Adding CUDA to PATH in .zshrc..." | |
echo '# CUDA paths' >> "/home/$SUDO_USER/.zshrc" | |
echo 'export PATH=$PATH:/usr/local/cuda/bin' >> "/home/$SUDO_USER/.zshrc" | |
else | |
log_skip "CUDA already in PATH. Skipping." | |
fi | |
if ! grep -q "export LD_LIBRARY_PATH=.*cuda" "/home/$SUDO_USER/.zshrc"; then | |
log "Adding CUDA to LD_LIBRARY_PATH in .zshrc..." | |
if ! grep -q "# CUDA paths" "/home/$SUDO_USER/.zshrc"; then | |
echo '# CUDA paths' >> "/home/$SUDO_USER/.zshrc" | |
fi | |
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64' >> "/home/$SUDO_USER/.zshrc" | |
else | |
log_skip "CUDA already in LD_LIBRARY_PATH. Skipping." | |
fi | |
# Install NVIDIA Container Toolkit if not already installed | |
if dpkg -l | grep -q nvidia-container-toolkit; then | |
log_skip "NVIDIA Container Toolkit already installed. Skipping installation." | |
else | |
log_section "NVIDIA CONTAINER TOOLKIT INSTALLATION" | |
log "Installing NVIDIA Container Toolkit..." | |
# Configure the production repository for NVIDIA Container Toolkit | |
log "Setting up NVIDIA Container Toolkit repository..." | |
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \ | |
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ | |
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ | |
tee /etc/apt/sources.list.d/nvidia-container-toolkit.list | |
# Update package list | |
log "Updating package list..." | |
apt-get update | |
# Install NVIDIA Container Toolkit | |
log "Installing NVIDIA Container Toolkit packages..." | |
apt-get install -y nvidia-container-toolkit | |
# Configure Docker to use NVIDIA Container Toolkit | |
log "Configuring Docker to use NVIDIA Container Toolkit..." | |
nvidia-ctk runtime configure --runtime=docker | |
# Restart Docker daemon | |
log "Restarting Docker daemon..." | |
systemctl restart docker | |
log_success "NVIDIA Container Toolkit installation and configuration completed." | |
fi | |
else | |
# Nouveau is still loaded, so we need to disable it and reboot | |
log_warning "Nouveau driver is loaded. Preparing to disable it..." | |
# Install necessary build tools | |
apt install build-essential linux-headers-$(uname -r) -y | |
# Disable Nouveau driver | |
log "Disabling Nouveau driver..." | |
echo 'blacklist nouveau' > /etc/modprobe.d/blacklist-nouveau.conf | |
echo 'options nouveau modeset=0' >> /etc/modprobe.d/blacklist-nouveau.conf | |
# Update initramfs | |
update-initramfs -u | |
# Create a marker file to indicate we need to reboot after disabling Nouveau | |
touch "/tmp/.nouveau_disabled_needs_reboot" | |
log_section "REBOOT REQUIRED" | |
log_warning "Nouveau driver has been disabled." | |
log_warning "You MUST reboot your system before continuing with NVIDIA installation." | |
echo "Reboot now? (y/n)" | |
read -r answer | |
if [ "$answer" = "y" ] || [ "$answer" = "Y" ]; then | |
log "Rebooting system now..." | |
reboot | |
else | |
log_warning "Please reboot manually, then run this script again to complete the installation." | |
exit 0 | |
fi | |
fi | |
fi | |
fi | |
# Test NVIDIA Container Toolkit with a sample workload | |
log_section "NVIDIA CONTAINER TOOLKIT TESTING" | |
if command -v nvidia-smi &> /dev/null && nvidia-smi &> /dev/null && command -v docker &> /dev/null; then | |
log "Testing NVIDIA Container Toolkit with a sample workload..." | |
# Check if the test was already run successfully | |
if [ -f "/tmp/.nvidia_container_test_done" ]; then | |
log_skip "NVIDIA Container Toolkit test was already run successfully. Skipping test." | |
else | |
log "Running a test container with nvidia-smi..." | |
if docker run --rm --runtime=nvidia --gpus all ubuntu nvidia-smi; then | |
log_success "NVIDIA Container Toolkit test successful!" | |
# Create a marker file to indicate successful test | |
touch /tmp/.nvidia_container_test_done | |
else | |
log_error "NVIDIA Container Toolkit test failed. Please check your installation." | |
fi | |
fi | |
fi | |
# Clean up marker files after successful setup | |
rm -f /tmp/.nvidia_driver_installed_needs_reboot | |
rm -f /tmp/.nouveau_disabled_needs_reboot | |
log_section "INSTALLATION COMPLETE" | |
log_success "Setup completed successfully!" | |
# Ask user if they want to reboot | |
echo "A reboot is recommended to complete the installation. Reboot now? (y/n)" | |
read -r answer | |
if [ "$answer" = "y" ] || [ "$answer" = "Y" ]; then | |
log "Rebooting system now..." | |
reboot | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment