Last active
April 13, 2024 17:51
-
-
Save vadimstasiev/d874464a8ecbc4008d330721a0dca385 to your computer and use it in GitHub Desktop.
Proxmox Single GPU Passthrough VFIO binding and unbinding
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
agent: 1 | |
balloon: 0 | |
bios: ovmf | |
boot: order=hostpci0;virtio1 | |
cores: 24 | |
cpu: host,hidden=1,flags=+pdpe1gb;+hv-tlbflush;+aes | |
efidisk0: local-zfs:vm-100-disk-0,efitype=4m,size=1M | |
hookscript: local:snippets/hookscript.sh | |
hostpci0: 0000:06:00,pcie=1,rombar=0,romfile=RTX3090-OG-Founders.bin | |
hostpci1: 0000:08:00.4 | |
hostpci2: 0000:08:00.3 | |
ide0: local:iso/virtio-win-0.1.240.iso,media=cdrom,size=612812K | |
ide2: local:iso/Win11_23H2_EnglishInternational_x64v2.iso,media=cdrom,size=6638374K | |
machine: pc-q35-8.1 | |
memory: 45000 | |
meta: creation-qemu=8.1.5,ctime=1709928892 | |
net0: e1000=BC:24:11:AE:F3:9B,bridge=vmbr0 | |
numa: 0 | |
ostype: win11 | |
scsihw: virtio-scsi-single | |
smbios1: uuid=9658f376-b663-485d-9bd2-7fb1b3c10976 | |
sockets: 1 | |
tpmstate0: local-zfs:vm-100-disk-2,size=4M,version=v2.0 | |
vga: none | |
virtio0: /dev/sdb,aio=threads,backup=0,size=976762584K | |
virtio1: local-zfs:vm-100-disk-3,iothread=1,size=300G | |
vmgenid: 33c5c951-af72-454d-9b9e-9ae7bccac5fb |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Proxmox VM start hook script for VM ID 100 with GPU, GPU audio, and USB controller passthrough | |
VMID="100" | |
CPUS="0-23" | |
GPU_PCI="0000:06:00.0" | |
GPU_AUDIO_PCI="0000:06:00.1" | |
USB_CONTROLLER_PCI="0000:08:00.3" | |
AUDIO_DAC="0000:08:00.4" | |
VFIO_DRIVER="vfio-pci" | |
LOCK_FILE="/tmp/binding_vfio.lock" | |
force_disconnect_device() { | |
local pci_id=$1 | |
echo 1 > /sys/bus/pci/devices/$pci_id/remove | |
echo 1 > /sys/bus/pci/rescan | |
} | |
if [ "$1" == "$VMID" ]; then | |
if [ "$2" == "pre-start" ]; then | |
touch "$LOCK_FILE" | |
modprobe vfio-pci | |
systemctl stop gdm3 | |
sleep 2 | |
echo $GPU_PCI > /sys/bus/pci/devices/$GPU_PCI/driver/unbind | |
sleep 2 | |
# echo 14 > /sys/bus/pci/devices/$GPU_PCI/resource1_resize | |
echo $VFIO_DRIVER > /sys/bus/pci/devices/$GPU_PCI/driver_override | |
echo $GPU_PCI > /sys/bus/pci/drivers/vfio-pci/bind | |
echo $GPU_AUDIO_PCI > /sys/bus/pci/devices/$GPU_AUDIO_PCI/driver/unbind | |
echo $VFIO_DRIVER > /sys/bus/pci/devices/$GPU_AUDIO_PCI/driver_override | |
echo $GPU_AUDIO_PCI > /sys/bus/pci/drivers/vfio-pci/bind | |
force_disconnect_device $USB_CONTROLLER_PCI | |
echo $USB_CONTROLLER_PCI > /sys/bus/pci/devices/$USB_CONTROLLER_PCI/driver/unbind | |
echo $VFIO_DRIVER > /sys/bus/pci/devices/$USB_CONTROLLER_PCI/driver_override | |
echo $USB_CONTROLLER_PCI > /sys/bus/pci/drivers/vfio-pci/bind | |
echo $AUDIO_DAC > /sys/bus/pci/devices/$AUDIO_DAC/driver/unbind | |
echo $VFIO_DRIVER > /sys/bus/pci/devices/$AUDIO_DAC/driver_override | |
echo $AUDIO_DAC > /sys/bus/pci/drivers/vfio-pci/bind | |
rm "$LOCK_FILE" | |
elif [ "$2" == "post-start" ]; then | |
# Ensure PID file is available for reading | |
while [ ! -f /run/qemu-server/$1.pid ]; do sleep 1; done | |
PID=$(cat /run/qemu-server/$1.pid) | |
taskset -cp $CPUS $PID | |
fi | |
fi |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Enable/disable debug mode | |
DEBUG_MODE=true | |
# VM ID to monitor | |
VMID=100 | |
# Path to your rebind script | |
REBIND_SCRIPT="/home/srv01/Scripts/rebind-script.sh" | |
# Define the LOCK_FILE | |
LOCK_FILE="/tmp/binding_vfio.lock" | |
# Last known state of the VM | |
last_state="unknown" | |
# Function to log messages when debug mode is enabled | |
debug_log() { | |
if [ "$DEBUG_MODE" = true ]; then | |
echo "[DEBUG] $1" | |
fi | |
} | |
# Function to check if VM is running | |
is_vm_running() { | |
qm status $VMID | grep -q "running" | |
result=$? | |
debug_log "is_vm_running: qm status $VMID result: $result" | |
return $result | |
} | |
# Function to rebind devices using the specified script | |
rebind_devices() { | |
debug_log "Rebinding devices with script: $REBIND_SCRIPT" | |
bash "$REBIND_SCRIPT" | |
} | |
# Main loop | |
while true; do | |
if ! is_vm_running; then | |
if [ "$last_state" != "stopped" ]; then | |
echo "VM $VMID has stopped." | |
last_state="stopped" | |
debug_log "State changed to stopped" | |
fi | |
if [ ! -f "$LOCK_FILE" ]; then | |
debug_log "Lock file not found, proceeding to rebind devices." | |
rebind_devices | |
else | |
debug_log "Lock file found: $LOCK_FILE" | |
fi | |
else | |
if [ "$last_state" != "running" ]; then | |
echo "VM $VMID is running." | |
last_state="running" | |
debug_log "State changed to running" | |
else | |
debug_log "VM $VMID is still running." | |
fi | |
fi | |
sleep 5 # Wait for 5 seconds before checking again | |
done |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
LOCK_FILE="/tmp/binding_vfio.lock" | |
# Define device IDs and their expected drivers | |
declare -A device_driver_map=( | |
["0000:06:00.0"]="nvidia" | |
["0000:06:00.1"]="snd_hda_intel" | |
["0000:08:00.3"]="xhci_hcd" | |
["0000:08:00.4"]="snd_hda_intel" | |
) | |
# Function to extract vendor and device ID for a PCI device | |
get_pci_vendor_device_id() { | |
local pci_id=$1 | |
local vendor_id=$(cat /sys/bus/pci/devices/$pci_id/vendor) | |
local device_id=$(cat /sys/bus/pci/devices/$pci_id/device) | |
# Strip leading '0x' from vendor/device ID | |
vendor_id=${vendor_id#0x} | |
device_id=${device_id#0x} | |
# Ensure lowercase for consistency | |
echo "${vendor_id,,} ${device_id,,}" | |
} | |
# Function to check if a device is bound to VFIO | |
is_bound_to_vfio() { | |
local pci_id=$1 | |
[[ "$(readlink /sys/bus/pci/devices/$pci_id/driver)" =~ vfio-pci$ ]] | |
} | |
# Function to unbind a device from VFIO and clear driver override | |
unbind_vfio_and_clear_override() { | |
local pci_id=$1 | |
if is_bound_to_vfio $pci_id; then | |
echo $pci_id > /sys/bus/pci/drivers/vfio-pci/unbind | |
echo > /sys/bus/pci/devices/$pci_id/driver_override | |
# Ensure the device is fully released before continuing | |
while lsof | grep -q $pci_id; do sleep 1; done | |
return 0 | |
else | |
return 1 | |
fi | |
} | |
# Function to reload a module | |
reload_module() { | |
local module=$1 | |
modprobe -r $module 2>/dev/null | |
# Wait a bit to ensure the module is fully unloaded | |
sleep 2 | |
modprobe $module 2>/dev/null | |
} | |
# Attempt to unbind devices from VFIO and clear driver overrides | |
device_rebound=false | |
for pci_id in "${!device_driver_map[@]}"; do | |
if unbind_vfio_and_clear_override $pci_id; then | |
device_rebound=true | |
fi | |
done | |
# Check if any device was rebound before proceeding | |
if [ "$device_rebound" = true ]; then | |
# Ensure all NVIDIA related modules are properly handled | |
reload_module "nvidia_drm" | |
reload_module "nvidia_modeset" | |
reload_module "nvidia_uvm" | |
reload_module "nvidia" | |
reload_module "snd_hda_intel" | |
# Wait a bit to ensure modules are fully loaded and devices are ready | |
sleep 5 | |
# Try to bind the device to the NVIDIA driver | |
for pci_id in "${!device_driver_map[@]}"; do | |
if [[ ${device_driver_map[$pci_id]} == "nvidia" ]]; then | |
echo -n $pci_id | tee /sys/bus/pci/drivers/nvidia/bind 2>/dev/null || true | |
# After binding, attempt to dynamically set new_id if necessary | |
new_id=$(get_pci_vendor_device_id $pci_id) | |
echo $new_id > /sys/bus/pci/drivers/nvidia/new_id 2>/dev/null || true | |
fi | |
done | |
# Rebind devices to their original drivers | |
for pci_id in "${!device_driver_map[@]}"; do | |
driver=${device_driver_map[$pci_id]} | |
# Initialize a counter for the timeout | |
counter=0 | |
timeout=10 # Timeout after 10 seconds | |
# Wait until the device is ready to accept a new driver or timeout | |
while [ ! -e /sys/bus/pci/drivers/$driver/$pci_id ]; do | |
sleep 1 | |
((counter++)) | |
if [ $counter -ge $timeout ]; then | |
echo "Timeout waiting for $pci_id to be ready for binding to $driver." | |
break # Exit the loop if timeout reached | |
fi | |
done | |
# If device is ready, attempt to bind | |
if [ -e /sys/bus/pci/drivers/$driver/$pci_id ]; then | |
echo $pci_id > /sys/bus/pci/drivers/$driver/bind | |
else | |
echo "Skipping binding for $pci_id to $driver due to timeout." | |
fi | |
done | |
# Get the PCI ID of the USB controller | |
usb_controller_id="" | |
for pci_id in "${!device_driver_map[@]}"; do | |
if [[ ${device_driver_map[$pci_id]} == "xhci_hcd" ]]; then | |
usb_controller_id=$pci_id | |
break | |
fi | |
done | |
# Check if USB controller ID is found | |
if [ -n "$usb_controller_id" ]; then | |
# Rescan USB bus | |
echo 1 > /sys/bus/pci/devices/$usb_controller_id/remove | |
echo 1 > /sys/bus/pci/rescan | |
else | |
echo "USB controller not found in device map. Skipping USB bus rescan." | |
fi | |
# Restart display manager to apply changes | |
systemctl restart gdm3 | |
echo "Devices have been rebound to their original drivers." | |
else | |
echo "No VFIO-bound devices needed rebinding." | |
fi |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[Unit] | |
Description=Continuous Rebind Devices Monitoring for VM 100 | |
[Service] | |
Type=simple | |
ExecStart=/home/srv01/Scripts/monitor-vm-100.sh | |
Restart=always | |
RestartSec=5 | |
[Install] | |
WantedBy=multi-user.target |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment