Last active
June 16, 2025 12:26
-
-
Save QNimbus/3f232138d228e74fc9dde5756bb8bc13 to your computer and use it in GitHub Desktop.
Proxmox k3s create script #proxmox #shell #scripts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# ==================================================== | |
# K3S VM Provisioning Script | |
# ==================================================== | |
# | |
# This script automates the provisioning of K3s VMs on Proxmox VE cluster. | |
# It performs the following operations: | |
# 1. Creates server, worker, and storage VMs by cloning a template VM | |
# 2. Configures resources (RAM, CPU, disk) for each VM type | |
# 3. Migrates VMs to specified Proxmox nodes for distributed deployment | |
# | |
# Requirements: | |
# - Proxmox VE with qm and pvesh commands available | |
# - Template VM with ID 1000 must exist | |
# - Sufficient resources on target Proxmox nodes | |
# - Valid config.json file with VM definitions | |
# | |
# Usage: ./create_vms.sh --config /path/to/config.json [--vmid <id1,id2,...>] | |
# | |
# Output: Creates a log file (vm_creation_output.log) with detailed operation logs | |
# ==================================================== | |
# Function to display usage information | |
usage() { | |
echo "Usage: $0 --config <config.json> [--cleanup] [--vmid <id1,id2,...>] [--disable-auto-start] [--nocloud-iso <iso-file>]" | |
echo "" | |
echo " --config Path to the JSON configuration file containing VM definitions" | |
echo " --cleanup Remove stray VM configuration files if they exist but VM is not registered" | |
echo " --vmid Comma-separated list of VM IDs to process (optional, defaults to all VMs in config)" | |
echo " --disable-auto-start Disable automatic start on boot for VMs (default: auto-start enabled)" | |
echo " --nocloud-iso ISO file name from NFS storage to use instead of cloud-init CD-ROM" | |
echo "" | |
echo "Example:" | |
echo " $0 --config ./config.json" | |
echo " $0 --config ./config.json --cleanup" | |
echo " $0 --config ./config.json --vmid 101,102,103" | |
echo " $0 --config ./config.json --disable-auto-start" | |
echo " $0 --config ./config.json --nocloud-iso nocloud-amd64.iso" | |
exit 1 | |
} | |
# Initialize variables to prevent unbound variable errors | |
CONFIG_FILE="" | |
CLEANUP="" | |
VM_IDS="" | |
DISABLE_AUTO_START="" | |
NOCLOUD_ISO="" | |
# Parse and validate command-line arguments | |
while [[ $# -gt 0 ]]; do | |
case $1 in | |
--config) | |
if [[ -z "$2" ]]; then | |
echo "Error: --config flag requires a value" >&2 | |
exit 1 | |
fi | |
CONFIG_FILE="$2" | |
shift 2 | |
;; | |
--cleanup) | |
CLEANUP="cleanup" | |
shift | |
;; | |
--vmid) | |
if [[ -z "$2" ]]; then | |
echo "Error: --vmid flag requires a comma-separated list of VM IDs" >&2 | |
exit 1 | |
fi | |
VM_IDS="$2" | |
shift 2 | |
;; | |
--disable-auto-start) | |
DISABLE_AUTO_START="true" | |
shift | |
;; | |
--nocloud-iso) | |
if [[ -z "$2" ]]; then | |
echo "Error: --nocloud-iso flag requires an ISO filename" >&2 | |
exit 1 | |
fi | |
NOCLOUD_ISO="$2" | |
shift 2 | |
;; | |
-h|--help) | |
usage | |
;; | |
*) | |
echo "Unknown option: $1" >&2 | |
usage | |
;; | |
esac | |
done | |
# Validate required arguments | |
if [[ -z "$CONFIG_FILE" ]]; then | |
echo "Error: --config flag is required" >&2 | |
usage | |
fi | |
if [[ ! -f "$CONFIG_FILE" ]]; then | |
echo "Error: Config file '$CONFIG_FILE' does not exist" >&2 | |
exit 1 | |
fi | |
# NOW enable strict error handling after argument validation | |
# Enable strict error handling: | |
# - 'set -e': Exit immediately if a command exits with a non-zero status. | |
# - 'set -u': Treat unset variables as an error and exit immediately. | |
# - 'set -o pipefail': Return the exit code of the last command in the pipeline that failed. | |
set -euo pipefail | |
# Load shared library | |
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | |
LIB_DIR="$SCRIPT_DIR/lib" | |
if [[ -f "$LIB_DIR/proxmox-common.sh" ]]; then | |
source "$LIB_DIR/proxmox-common.sh" | |
else | |
echo "Error: Cannot find shared library at $LIB_DIR/proxmox-common.sh" >&2 | |
exit 1 | |
fi | |
echo "=== K3S VM Provisioning Script ===" | |
echo "Config file: $CONFIG_FILE" | |
if [[ -n "$NOCLOUD_ISO" ]]; then | |
echo "NoCloud ISO: $NOCLOUD_ISO (will replace cloud-init CD-ROM)" | |
fi | |
echo "This script will provision K3s VMs on Proxmox nodes" | |
echo "Script will exit on any error" | |
echo "===============================" | |
# Initialize script environment with enhanced error handling | |
init_script_env | |
# Function for preflight checks with enhanced error context | |
# | |
# Verifies that required Proxmox tools (qm and pvesh) are available | |
# before proceeding with VM provisioning. | |
# | |
# Arguments: None | |
# Returns: Exits with status 1 if required tools are not found | |
preflight_checks() { | |
LAST_COMMAND_CONTEXT="Performing preflight checks for VM provisioning" | |
echo "Performing preflight checks..." | |
local required_tools="qm pvesh jq" | |
for tool in $required_tools; do | |
LAST_COMMAND="command -v $tool" | |
if ! command -v "$tool" &> /dev/null; then | |
case "$tool" in | |
"qm") | |
echo "Error: 'qm' command not found. Please ensure Proxmox VE CLI tools are installed." >&2 | |
echo "This command is required for VM management operations." >&2 | |
;; | |
"pvesh") | |
echo "Error: 'pvesh' command not found. Please ensure Proxmox VE API client is installed." >&2 | |
echo "This command is required for querying Proxmox cluster information." >&2 | |
;; | |
"jq") | |
echo "Error: 'jq' command not found. Please install jq for JSON parsing." >&2 | |
echo "Install with: apt-get install jq" >&2 | |
;; | |
esac | |
exit 1 | |
fi | |
done | |
echo "✓ Required tools are available" | |
LAST_COMMAND_CONTEXT="" | |
LAST_COMMAND="" | |
} | |
# Function to restore cursor visibility | |
# | |
# Ensures the terminal cursor is visible, typically called | |
# after operations that might hide the cursor. | |
# | |
# Arguments: None | |
# Returns: None | |
restore_cursor() { | |
printf "\033[?25h" # Show cursor | |
} | |
# Ensure cursor is restored on script exit | |
trap restore_cursor EXIT | |
# Function to safely clone a VM with enhanced error reporting | |
# | |
# Creates a new VM by cloning a template VM, with safety checks to prevent | |
# overwriting existing VMs with the same ID. | |
# | |
# Arguments: | |
# $1 - Source VM ID to clone from | |
# $2 - Target VM ID to create | |
# $3 - Name for the new VM | |
# $4 - Storage location for the VM | |
# $5 - (Optional) "cleanup" to remove stray config files | |
# | |
# Returns: 0 if successful or VM already exists | |
# Exits with status 1 if cloning fails | |
clone_vm_with_check() { | |
local source_id=$1 | |
local target_id=$2 | |
local vm_name=$3 | |
local storage=$4 | |
local cleanup=${5:-""} | |
local desc="Creating ${vm_name}..." | |
local config_file="/etc/pve/nodes/$(hostname)/qemu-server/${target_id}.conf" | |
# Set context for error handling | |
LAST_COMMAND_CONTEXT="$desc (VM ID: $target_id)" | |
printf "%-50s" "$desc" | |
echo "$(date): $desc - Checking if VM $target_id already exists" >> "$LOG_FILE" | |
# Check if VM exists (using shared library) | |
LAST_COMMAND="qm list | grep \"^ *$target_id \"" | |
if resource_exists "$target_id" "vm"; then | |
echo "[WARNING] VM $target_id already exists. Skipping." | |
echo "$(date): $desc - VM $target_id already exists. Skipping." >> "$LOG_FILE" | |
LAST_COMMAND_CONTEXT="" | |
return 0 | |
fi | |
# Check if config file exists but VM is not registered | |
if [[ -f "$config_file" ]]; then | |
if [[ "$cleanup" == "cleanup" ]]; then | |
echo "[WARNING] VM $target_id config exists but VM not registered. Removing config file." | |
echo "$(date): $desc - VM $target_id config exists but VM not registered. Removing config file." >> "$LOG_FILE" | |
LAST_COMMAND="rm -f $config_file" | |
rm -f "$config_file" | |
else | |
echo "[WARNING] VM $target_id config exists but VM not registered. Use --cleanup to remove." | |
echo "$(date): $desc - VM $target_id config exists but VM not registered. Skipping." >> "$LOG_FILE" | |
LAST_COMMAND_CONTEXT="" | |
return 0 | |
fi | |
fi | |
# Verify source VM exists before attempting clone | |
LAST_COMMAND="qm list | grep \"^ *$source_id \"" | |
if ! qm list | grep -q "^ *$source_id "; then | |
echo "[FAILED]" | |
echo "Error: Source VM ID $source_id does not exist. Cannot clone." >&2 | |
echo "Available VMs:" >&2 | |
qm list >&2 | |
exit 1 | |
fi | |
local cmd="qm clone $source_id $target_id --name $vm_name --full true --storage $storage" | |
run_with_spinner "$desc" "$cmd" | |
LAST_COMMAND_CONTEXT="" | |
} | |
# Function to check node availability and capacity | |
# | |
# Verifies that a Proxmox node is online and has sufficient | |
# memory and disk resources for VM migration. | |
# | |
# Arguments: | |
# $1 - Name of the Proxmox node to check | |
# | |
# Returns: Exits with status 1 if node is offline or has insufficient resources | |
check_node() { | |
local node=$1 | |
echo "Checking availability and capacity for node: $node..." | |
# Check if node exists and is online | |
if ! pvesh get /nodes --output-format=json | jq -e ".[] | select(.node == \"$node\" and .status == \"online\")" > /dev/null; then | |
echo "Error: Node $node is not online or doesn't exist. Aborting migration." >&2 | |
exit 1 | |
fi | |
# Get free memory and disk space | |
local free_memory=$(pvesh get /nodes/$node/status --output-format=json | jq '.memory.free' | tr -d '"') | |
local free_disk=$(pvesh get /nodes/$node/status --output-format=json | jq '.rootfs.free' | tr -d '"') | |
## Check if resources are sufficient (16GB RAM and 250GB disk minimum) | |
# if [[ ${free_memory:-0} -lt 17179869184 || ${free_disk:-0} -lt 268435456000 ]]; then | |
# echo "Error: Node $node does not have sufficient resources. Aborting migration." >&2 | |
# exit 1 | |
# fi | |
echo "✓ Node $node is online and has sufficient resources" | |
} | |
# Function to resize a VM disk if it exists | |
# | |
# Resizes the specified disk of a VM after checking if the disk exists. | |
# Executes the command on the remote Proxmox node via SSH. | |
# | |
# Arguments: | |
# $1 - VM ID to modify | |
# $2 - Disk identifier (e.g., scsi0) | |
# $3 - New size for the disk (e.g., 256G) | |
# $4 - Proxmox node name where the VM is located | |
# | |
# Returns: None, but outputs error message if disk doesn't exist | |
resize_disk() { | |
local vm_id=$1 | |
local disk=$2 | |
local size=$3 | |
local node=$4 | |
local desc="Resizing disk $disk on VM $vm_id to $size on node $node" | |
# Check if the disk exists via SSH | |
if ssh -o StrictHostKeyChecking=no $node "qm config $vm_id | grep -q '$disk'"; then | |
local cmd="ssh -o StrictHostKeyChecking=no $node 'qm resize $vm_id $disk $size'" | |
run_with_spinner "$desc" "$cmd" | |
else | |
echo "$(date): Error: Disk $disk does not exist for VM $vm_id on node $node. Skipping resize." >> "$LOG_FILE" | |
printf "%-50s[SKIPPED] - Disk does not exist\n" "$desc" | |
fi | |
} | |
# Function to get VM configuration from JSON | |
# | |
# Extracts VM configuration details from the JSON config file. | |
# | |
# Arguments: | |
# $1 - VM ID to look up | |
# $2 - Property to extract (role, node, etc.) | |
# | |
# Returns: Outputs the requested property value | |
get_vm_config() { | |
local vm_id=$1 | |
local property=$2 | |
jq -r ".nodes[] | select(.vmid == $vm_id) | .$property" "$CONFIG_FILE" | |
} | |
# Function to get all VM IDs from config | |
# | |
# Returns a list of all VM IDs defined in the configuration. | |
# | |
# Arguments: None | |
# Returns: Outputs space-separated list of VM IDs | |
get_all_vm_ids() { | |
jq -r '.nodes[].vmid' "$CONFIG_FILE" | tr '\n' ' ' | |
} | |
# Function to get VM IDs by role | |
# | |
# Returns a list of VM IDs that match the specified role. | |
# | |
# Arguments: | |
# $1 - Role to filter by (server, agent, storage) | |
# | |
# Returns: Outputs space-separated list of VM IDs | |
get_vm_ids_by_role() { | |
local role=$1 | |
jq -r ".nodes[] | select(.role == \"$role\") | .vmid" "$CONFIG_FILE" | tr '\n' ' ' | |
} | |
# Function to configure VM resources based on role | |
# | |
# Applies role-specific resource configurations (RAM, CPU, disk). | |
# | |
# Arguments: | |
# $1 - VM ID to configure | |
# $2 - VM role (server, agent, storage) | |
# | |
# Returns: None | |
configure_vm_resources() { | |
local vm_id=$1 | |
local role=$2 | |
local vm_name=$(get_vm_config "$vm_id" "role") | |
case $role in | |
"agent") | |
echo "Configuring agent VM $vm_id (16GB RAM, 4 vCPUs)..." | |
qm set "$vm_id" --memory 16384 --balloon 0 >> "$LOG_FILE" 2>&1 | |
qm set "$vm_id" --sockets 1 --cores 4 >> "$LOG_FILE" 2>&1 | |
;; | |
"storage") | |
echo "Configuring storage VM $vm_id (4 vCPUs)..." | |
qm set "$vm_id" --sockets 1 --cores 4 >> "$LOG_FILE" 2>&1 | |
;; | |
"server") | |
echo "Server VM $vm_id using default configuration..." | |
;; | |
*) | |
echo "Warning: Unknown role '$role' for VM $vm_id. Using default configuration." >&2 | |
;; | |
esac | |
} | |
# Function to tag a VM with its role | |
# | |
# Applies a role-based tag to a VM for easier identification and management. | |
# | |
# Arguments: | |
# $1 - VM ID to tag | |
# $2 - VM role (server, agent, storage) | |
# | |
# Returns: None | |
tag_vm() { | |
local vm_id=$1 | |
local role=$2 | |
local tag="k3s-${role}" | |
local desc="Tagging VM $vm_id with role '$tag'" | |
printf "%-50s" "$desc" | |
echo "$(date): $desc" >> "$LOG_FILE" | |
qm set "$vm_id" --tags "$tag" >> "$LOG_FILE" 2>&1 & | |
local pid=$! | |
spinner $pid | |
wait $pid | |
local status=$? | |
if [ $status -eq 0 ]; then | |
echo "[DONE]" | |
else | |
echo "[FAILED] - Check $LOG_FILE for details" | |
fi | |
} | |
# Function to filter VM IDs based on user-provided list | |
# | |
# When the --vmid option is used, this function filters the complete VM ID list | |
# to only include the specified VMs. | |
# | |
# Arguments: | |
# $1 - Space-separated list of all VM IDs from config | |
# $2 - Comma-separated list of VM IDs to filter (from --vmid option) | |
# | |
# Returns: Outputs space-separated list of filtered VM IDs | |
filter_vm_ids() { | |
local all_ids=($1) | |
local filter_ids_str=$2 | |
# If no filter is provided, return all IDs | |
if [[ -z "$filter_ids_str" ]]; then | |
echo "$1" | |
return | |
fi | |
# Convert comma-separated list to space-separated for easier processing | |
local filter_ids=(${filter_ids_str//,/ }) | |
local result="" | |
# Loop through all IDs and check if they're in the filter list | |
for id in "${all_ids[@]}"; do | |
for filter_id in "${filter_ids[@]}"; do | |
if [[ "$id" == "$filter_id" ]]; then | |
result="$result $id" | |
break | |
fi | |
done | |
done | |
echo "$result" | |
} | |
# Function to configure VM auto-start behavior | |
# | |
# Configures whether a VM should automatically start on boot. | |
# By default, VMs are set to auto-start unless --disable-auto-start is specified. | |
# | |
# Arguments: | |
# $1 - VM ID to configure | |
# $2 - VM role (for logging purposes) | |
# $3 - Target node where the VM is located | |
# | |
# Returns: None | |
configure_vm_autostart() { | |
local vm_id=$1 | |
local role=$2 | |
local target_node=$3 | |
if [[ "$DISABLE_AUTO_START" == "true" ]]; then | |
local desc="Disabling auto-start for VM $vm_id on $target_node" | |
local onboot_value=0 | |
else | |
local desc="Enabling auto-start for VM $vm_id on $target_node" | |
local onboot_value=1 | |
fi | |
printf "%-50s" "$desc" | |
echo "$(date): $desc" >> "$LOG_FILE" | |
# Execute the command on the target node via SSH if it's remote, or locally if it's the current node | |
local current_node=$(hostname) | |
if [[ "$target_node" == "$current_node" ]]; then | |
qm set "$vm_id" --onboot "$onboot_value" >> "$LOG_FILE" 2>&1 & | |
else | |
ssh -o StrictHostKeyChecking=no "$target_node" "qm set $vm_id --onboot $onboot_value" >> "$LOG_FILE" 2>&1 & | |
fi | |
local pid=$! | |
spinner $pid | |
wait $pid | |
local status=$? | |
if [ $status -eq 0 ]; then | |
echo "[DONE]" | |
else | |
echo "[FAILED] - Check $LOG_FILE for details" | |
fi | |
} | |
# Function to detect ISO storage in Proxmox | |
# | |
# Queries the Proxmox configuration to find available ISO storage locations. | |
# | |
# Arguments: None | |
# Returns: Outputs the first available ISO storage name | |
detect_iso_storage() { | |
echo "Detecting Proxmox ISO storage..." >&2 | |
# Get all storage with ISO content type | |
local iso_storages=$(pvesm status --content iso 2>/dev/null | tail -n +2 | awk '{print $1}' || echo "") | |
if [[ -z "$iso_storages" ]]; then | |
echo "Error: No ISO storage found in Proxmox configuration." >&2 | |
echo "Please configure at least one storage with 'iso' content type." >&2 | |
exit 1 | |
fi | |
# Use the first available ISO storage | |
local selected_storage=$(echo "$iso_storages" | head -n 1) | |
echo "✓ Using ISO storage: $selected_storage" >&2 | |
echo "$selected_storage" | |
} | |
# Function to get storage path on a specific node | |
# | |
# Retrieves the filesystem path for a storage location on a Proxmox node. | |
# Can execute either locally or remotely via SSH depending on the target node. | |
# | |
# Arguments: | |
# $1 - Storage name to query | |
# $2 - Target Proxmox node name (optional, defaults to current node) | |
# | |
# Returns: Outputs the full path to the ISO directory for the storage | |
get_storage_path() { | |
local storage_name=$1 | |
local target_node=${2:-$(hostname)} | |
local current_node=$(hostname) | |
echo "Getting storage path for $storage_name on node $target_node..." >&2 | |
# Get storage config and extract path | |
local storage_path="" | |
if [[ "$target_node" == "$current_node" ]]; then | |
# Execute locally | |
storage_path=$(pvesh get /storage/$storage_name --output-format=json 2>/dev/null | jq -r '.path // .export' 2>/dev/null || echo "") | |
else | |
# Execute remotely via SSH | |
storage_path=$(ssh -o StrictHostKeyChecking=no "$target_node" "pvesh get /storage/$storage_name --output-format=json 2>/dev/null | jq -r '.path // .export' 2>/dev/null" || echo "") | |
fi | |
if [[ -z "$storage_path" ]]; then | |
echo "Error: Could not determine path for storage '$storage_name' on node '$target_node'." >&2 | |
echo "Please check storage configuration with: pvesh get /storage/$storage_name" >&2 | |
exit 1 | |
fi | |
# Ensure the iso subdirectory exists | |
local iso_path="${storage_path}/template/iso" | |
if [[ "$target_node" == "$current_node" ]]; then | |
# Check locally | |
if [[ ! -d "$iso_path" ]]; then | |
echo "Creating ISO directory: $iso_path" >&2 | |
mkdir -p "$iso_path" 2>/dev/null || { | |
echo "Error: Cannot create ISO directory. Check permissions." >&2 | |
echo "Attempted path: $iso_path" >&2 | |
exit 1 | |
} | |
fi | |
else | |
# Check remotely via SSH | |
if ! ssh -o StrictHostKeyChecking=no "$target_node" "[[ -d '$iso_path' ]]" 2>/dev/null; then | |
echo "Creating ISO directory: $iso_path on $target_node" >&2 | |
ssh -o StrictHostKeyChecking=no "$target_node" "mkdir -p '$iso_path'" 2>/dev/null || { | |
echo "Error: Cannot create ISO directory on $target_node. Check permissions." >&2 | |
echo "Attempted path: $iso_path" >&2 | |
exit 1 | |
} | |
fi | |
fi | |
echo "✓ Storage path: $iso_path" >&2 | |
echo "$iso_path" | |
} | |
# Function to list available ISOs on a storage | |
# | |
# Lists all ISO files available in the specified storage location. | |
# Can execute either locally or remotely via SSH depending on the target node. | |
# | |
# Arguments: | |
# $1 - Storage name to query | |
# $2 - Target Proxmox node name (optional, defaults to current node) | |
# | |
# Returns: Outputs list of ISO files available in the storage | |
list_available_isos() { | |
local storage_name=$1 | |
local target_node=${2:-$(hostname)} | |
local current_node=$(hostname) | |
echo "Listing available ISOs in storage $storage_name on node $target_node..." >&2 | |
# List ISOs using pvesm | |
local iso_list="" | |
if [[ "$target_node" == "$current_node" ]]; then | |
# Execute locally | |
iso_list=$(pvesm list "$storage_name" --content iso 2>/dev/null | tail -n +2 | awk '{print $1}' | sed "s|^$storage_name:iso/||" || echo "") | |
else | |
# Execute remotely via SSH | |
iso_list=$(ssh -o StrictHostKeyChecking=no "$target_node" "pvesm list $storage_name --content iso 2>/dev/null | tail -n +2 | awk '{print \$1}' | sed 's|^$storage_name:iso/||'" || echo "") | |
fi | |
if [[ -z "$iso_list" ]]; then | |
echo "No ISOs found in storage $storage_name on node $target_node" >&2 | |
return 0 | |
fi | |
echo "Available ISOs:" >&2 | |
echo "$iso_list" | while read -r iso; do | |
echo " - $iso" >&2 | |
done | |
echo "$iso_list" | |
} | |
# Function to check if a specific ISO exists in storage | |
# | |
# Checks whether a specific ISO file exists in the given storage location. | |
# | |
# Arguments: | |
# $1 - Storage name to check | |
# $2 - ISO filename to look for | |
# $3 - Target Proxmox node name (optional, defaults to current node) | |
# | |
# Returns: 0 if ISO exists, 1 if not found | |
check_iso_exists() { | |
local storage_name=$1 | |
local iso_filename=$2 | |
local target_node=${3:-$(hostname)} | |
local available_isos=$(list_available_isos "$storage_name" "$target_node") | |
if echo "$available_isos" | grep -q "^$iso_filename$"; then | |
echo "✓ ISO $iso_filename found in storage $storage_name on node $target_node" >&2 | |
return 0 | |
else | |
echo "✗ ISO $iso_filename not found in storage $storage_name on node $target_node" >&2 | |
return 1 | |
fi | |
} | |
# Function to configure VM with NoCloud ISO | |
# | |
# Replaces the cloud-init CD-ROM with a custom NoCloud ISO from NFS storage. | |
# This allows using custom cloud-init configurations or Talos ISOs. | |
# Also replaces the existing Ubuntu cloud image disk with an empty disk to prevent | |
# Ubuntu from booting after reboot, configures boot order to prioritize disk first, | |
# and sets CPU arguments for Talos v1.0+ compatibility (x86-64-v2 microarchitecture). | |
# Properly cleans up the old disk storage to avoid leaving unused disk images. | |
# | |
# Arguments: | |
# $1 - VM ID to configure | |
# $2 - ISO filename (without storage prefix) | |
# $3 - Target node where the VM is located | |
# | |
# Returns: None | |
configure_nocloud_iso() { | |
local vm_id=$1 | |
local iso_filename=$2 | |
local target_node=$3 | |
local current_node=$(hostname) | |
# Determine disk size based on VM role | |
local role=$(get_vm_config "$vm_id" "role") | |
local disk_size="10" # Default size | |
if [[ "$role" == "storage" ]]; then | |
disk_size="256" # Storage VMs need larger disks | |
fi | |
local desc="Configuring NoCloud ISO for VM $vm_id (${disk_size}G disk)" | |
printf "%-50s" "$desc" | |
echo "$(date): $desc - ISO: $iso_filename, Role: $role, Disk size: ${disk_size}G on node $target_node" >> "$LOG_FILE" | |
# First, verify the ISO exists in NFS storage on the target node | |
if ! check_iso_exists "nfs" "$iso_filename" "$target_node"; then | |
echo "[FAILED] - ISO $iso_filename not found in NFS storage on $target_node" | |
echo "$(date): Error: ISO $iso_filename not found in NFS storage on $target_node" >> "$LOG_FILE" | |
exit 1 | |
fi | |
# Get current disk configuration before making changes | |
local current_disk_config="" | |
if [[ "$target_node" == "$current_node" ]]; then | |
current_disk_config=$(qm config "$vm_id" | grep '^scsi0:' || echo "") | |
else | |
current_disk_config=$(ssh -o StrictHostKeyChecking=no "$target_node" "qm config $vm_id | grep '^scsi0:'" || echo "") | |
fi | |
echo "$(date): $desc - Current scsi0 config: $current_disk_config" >> "$LOG_FILE" | |
# Extract the current disk identifier (e.g., vmdata:vm-1211-disk-0) | |
local old_disk_id="" | |
if [[ -n "$current_disk_config" ]]; then | |
# Extract disk ID from config like "scsi0: vmdata:vm-1211-disk-0,size=10G" | |
old_disk_id=$(echo "$current_disk_config" | sed -n 's/scsi0: \([^,]*\).*/\1/p') | |
echo "$(date): $desc - Old disk ID: $old_disk_id" >> "$LOG_FILE" | |
fi | |
# Configure the CD-ROM with the NoCloud ISO, boot order, and CPU args for Talos compatibility | |
local iso_path="nfs:iso/$iso_filename" | |
local cpu_args="-cpu kvm64,+cx16,+lahf_lm,+popcnt,+sse3,+ssse3,+sse4.1,+sse4.2" | |
# Step 1: Delete the existing disk | |
echo "$(date): $desc - Step 1: Deleting existing scsi0 disk" >> "$LOG_FILE" | |
local delete_cmd="" | |
if [[ "$target_node" == "$current_node" ]]; then | |
delete_cmd="qm set $vm_id --delete scsi0" | |
else | |
delete_cmd="ssh -o StrictHostKeyChecking=no $target_node 'qm set $vm_id --delete scsi0'" | |
fi | |
echo "$(date): $desc - DELETE COMMAND: $delete_cmd" >> "$LOG_FILE" | |
if ! bash -c "$delete_cmd" >> "$LOG_FILE" 2>&1; then | |
echo "[FAILED] - Could not delete existing disk" | |
echo "$(date): $desc - Failed to delete existing scsi0 disk" >> "$LOG_FILE" | |
exit 1 | |
fi | |
# Step 2: Clean up the old disk storage immediately after deletion | |
if [[ -n "$old_disk_id" && "$old_disk_id" =~ ^[^:]+:vm-[0-9]+-disk-[0-9]+$ ]]; then | |
echo "$(date): $desc - Step 2: Cleaning up old disk storage: $old_disk_id" >> "$LOG_FILE" | |
local cleanup_cmd="" | |
if [[ "$target_node" == "$current_node" ]]; then | |
cleanup_cmd="pvesm free $old_disk_id" | |
else | |
cleanup_cmd="ssh -o StrictHostKeyChecking=no $target_node 'pvesm free $old_disk_id'" | |
fi | |
echo "$(date): $desc - CLEANUP COMMAND: $cleanup_cmd" >> "$LOG_FILE" | |
if ! bash -c "$cleanup_cmd" >> "$LOG_FILE" 2>&1; then | |
echo "$(date): $desc - Warning: Could not clean up old disk $old_disk_id (may have been already removed)" >> "$LOG_FILE" | |
else | |
echo "$(date): $desc - Successfully cleaned up old disk $old_disk_id" >> "$LOG_FILE" | |
fi | |
# Wait a moment for storage cleanup to complete | |
echo "$(date): $desc - Waiting for storage cleanup to complete..." >> "$LOG_FILE" | |
sleep 2 | |
else | |
echo "$(date): $desc - Old disk ID '$old_disk_id' is not valid for cleanup (expected format: storage:vm-id-disk-number)" >> "$LOG_FILE" | |
fi | |
# Step 3: Configure new disk, ISO, boot order, and CPU args | |
echo "$(date): $desc - Step 3: Creating new ${disk_size}G disk and configuring VM" >> "$LOG_FILE" | |
local config_cmd="" | |
if [[ "$target_node" == "$current_node" ]]; then | |
config_cmd="qm set $vm_id --scsi0 vmdata:$disk_size --ide2 $iso_path,media=cdrom --boot order=scsi0\\;ide2 --args '$cpu_args'" | |
else | |
config_cmd="ssh -o StrictHostKeyChecking=no $target_node 'qm set $vm_id --scsi0 vmdata:$disk_size --ide2 $iso_path,media=cdrom --boot order=scsi0\\;ide2 --args \"$cpu_args\"'" | |
fi | |
echo "$(date): $desc - CONFIG COMMAND: $config_cmd" >> "$LOG_FILE" | |
bash -c "$config_cmd" >> "$LOG_FILE" 2>&1 & | |
local pid=$! | |
spinner $pid | |
wait $pid | |
local status=$? | |
if [ $status -ne 0 ]; then | |
echo "[FAILED] - Check $LOG_FILE for details" | |
echo "$(date): $desc - Failed to configure VM with new disk and ISO" >> "$LOG_FILE" | |
exit 1 | |
fi | |
echo "[DONE]" | |
echo "$(date): VM $vm_id configured with NoCloud ISO: $iso_filename, new ${disk_size}G disk, boot order: scsi0,ide2, and CPU args for Talos compatibility" >> "$LOG_FILE" | |
# Verify the new configuration | |
local new_disk_config="" | |
if [[ "$target_node" == "$current_node" ]]; then | |
new_disk_config=$(qm config "$vm_id" | grep '^scsi0:' || echo "") | |
else | |
new_disk_config=$(ssh -o StrictHostKeyChecking=no "$target_node" "qm config $vm_id | grep '^scsi0:'" || echo "") | |
fi | |
echo "$(date): $desc - New scsi0 config: $new_disk_config" >> "$LOG_FILE" | |
# Check if the new disk reused the old identifier | |
if [[ -n "$old_disk_id" && "$new_disk_config" =~ $old_disk_id ]]; then | |
echo "$(date): $desc - Successfully reused old disk identifier: $old_disk_id" >> "$LOG_FILE" | |
else | |
echo "$(date): $desc - New disk created with different identifier (this is normal)" >> "$LOG_FILE" | |
fi | |
} | |
# Function to verify NoCloud ISO availability | |
# | |
# Checks if the specified NoCloud ISO exists in NFS storage on all target nodes | |
# before proceeding with VM configuration. | |
# | |
# Arguments: | |
# $1 - ISO filename to verify | |
# $2 - Array of VM IDs to check target nodes for | |
# | |
# Returns: Exits with status 1 if ISO not found on any required node | |
verify_nocloud_iso_availability() { | |
local iso_filename=$1 | |
local vm_ids=("${@:2}") | |
echo "Verifying NoCloud ISO availability..." | |
# Get unique target nodes from the VM configuration | |
local target_nodes=() | |
for vm_id in "${vm_ids[@]}"; do | |
if [[ -n "$vm_id" ]]; then | |
local target_node=$(get_vm_config "$vm_id" "node") | |
if [[ -n "$target_node" ]]; then | |
# Add to array if not already present | |
if [[ ! " ${target_nodes[@]} " =~ " ${target_node} " ]]; then | |
target_nodes+=("$target_node") | |
fi | |
fi | |
fi | |
done | |
# Check ISO availability on each target node | |
local missing_nodes=() | |
for node in "${target_nodes[@]}"; do | |
echo "Checking ISO $iso_filename on node $node..." | |
if ! check_iso_exists "nfs" "$iso_filename" "$node"; then | |
missing_nodes+=("$node") | |
fi | |
done | |
if [[ ${#missing_nodes[@]} -gt 0 ]]; then | |
echo "Error: NoCloud ISO '$iso_filename' not found on the following nodes:" >&2 | |
for node in "${missing_nodes[@]}"; do | |
echo " - $node" >&2 | |
done | |
echo "" >&2 | |
echo "Please ensure the ISO is available in NFS storage on all target nodes." >&2 | |
echo "You can use the talos_download.sh script to download ISOs to storage." >&2 | |
exit 1 | |
fi | |
echo "✓ NoCloud ISO $iso_filename verified on all target nodes" | |
} | |
# Create output log file with enhanced error handling | |
init_log_file "vm_creation_output.log" "Starting VM provisioning from config: $CONFIG_FILE" | |
# Check if template VM exists with better error reporting | |
LAST_COMMAND_CONTEXT="Verifying template VM availability" | |
LAST_COMMAND="qm list | grep \"^ *1000 \"" | |
if ! qm list | grep -q "^ *1000 "; then | |
echo "Error: Source VM ID 1000 does not exist. Cannot proceed with VM provisioning." >&2 | |
echo "" >&2 | |
echo "Available VMs:" >&2 | |
qm list >&2 | |
echo "" >&2 | |
echo "Please ensure template VM 1000 exists before running this script." >&2 | |
exit 1 | |
fi | |
echo "✓ Template VM 1000 found" | |
LAST_COMMAND_CONTEXT="" | |
LAST_COMMAND="" | |
# Get all VM IDs from config | |
all_vm_ids_str=$(get_all_vm_ids) | |
all_vm_ids=($(echo "$all_vm_ids_str")) | |
# Filter VM IDs if --vmid is provided (using shared library) | |
if [[ -n "$VM_IDS" ]]; then | |
filtered_vm_ids_str=$(filter_ids "$all_vm_ids_str" "$VM_IDS") | |
filtered_vm_ids=($(echo "$filtered_vm_ids_str")) | |
echo "Filtering VMs to provision: ${filtered_vm_ids[*]} (from list of ${#all_vm_ids[@]} total VMs)" | |
all_vm_ids=("${filtered_vm_ids[@]}") | |
else | |
echo "Found ${#all_vm_ids[@]} VMs to provision: ${all_vm_ids[*]}" | |
fi | |
# Verify NoCloud ISO availability if specified | |
if [[ -n "$NOCLOUD_ISO" ]]; then | |
verify_nocloud_iso_availability "$NOCLOUD_ISO" "${all_vm_ids[@]}" | |
fi | |
# Create all VMs | |
echo "[1/6] Creating VMs from template..." | |
for vm_id in "${all_vm_ids[@]}"; do | |
if [[ -n "$vm_id" ]]; then | |
role=$(get_vm_config "$vm_id" "role") | |
vm_name="k3s-${role}-$(printf "%03d" $((vm_id % 10)))" | |
clone_vm_with_check 1000 "$vm_id" "$vm_name" "vmdata" "$CLEANUP" | |
fi | |
done | |
# Tag VMs with their roles | |
echo "[2/6] Tagging VMs with roles..." | |
for vm_id in "${all_vm_ids[@]}"; do | |
if [[ -n "$vm_id" ]]; then | |
role=$(get_vm_config "$vm_id" "role") | |
tag_vm "$vm_id" "$role" | |
fi | |
done | |
# Configure VMs by role | |
echo "[3/6] Configuring VM resources..." | |
for vm_id in "${all_vm_ids[@]}"; do | |
if [[ -n "$vm_id" ]]; then | |
role=$(get_vm_config "$vm_id" "role") | |
configure_vm_resources "$vm_id" "$role" | |
fi | |
done | |
# Migrate VMs to target nodes | |
echo "[4/6] Migrating VMs to target nodes..." | |
for vm_id in "${all_vm_ids[@]}"; do | |
if [[ -n "$vm_id" ]]; then | |
target_node=$(get_vm_config "$vm_id" "node") | |
vm_name="k3s-$(get_vm_config "$vm_id" "role")-$(printf "%03d" $((vm_id % 10)))" | |
# Get current hostname instead of assuming we're on pve1 | |
current_node=$(hostname) | |
if [[ "$target_node" != "$current_node" ]]; then | |
check_node "$target_node" | |
# Check if VM already exists on target node | |
printf "%-50s" "Checking if $vm_name exists on $target_node..." | |
if ssh -o StrictHostKeyChecking=no "$target_node" "qm list | grep -q \"^ *$vm_id \"" 2>/dev/null; then | |
echo "[EXISTS] - VM $vm_id already exists on $target_node, skipping migration" | |
echo "$(date): VM $vm_id already exists on $target_node, skipping migration" >> "$LOG_FILE" | |
continue | |
else | |
echo "[NOT FOUND] - Proceeding with migration" | |
fi | |
cmd="qm migrate $vm_id $target_node --with-local-disks" | |
printf "%-50s" "Migrating $vm_name to $target_node" | |
echo "$(date): Migrating $vm_name to $target_node - COMMAND: $cmd" >> "$LOG_FILE" | |
bash -c "$cmd" >> "$LOG_FILE" 2>&1 & | |
pid=$! | |
spinner $pid | |
wait $pid | |
status=$? | |
if [ $status -eq 0 ]; then | |
echo "[DONE]" | |
else | |
echo "[FAILED] - Migration of $vm_name to $target_node failed. Check $LOG_FILE for details." | |
echo "$(date): Migration of $vm_name to $target_node failed." >> "$LOG_FILE" | |
# Don't exit with failure if the error might be that the VM already exists | |
if grep -q "File exists" "$LOG_FILE"; then | |
echo "It appears the VM might already exist on the target node. Continuing..." | |
echo "$(date): VM might already exist on target node. Continuing despite migration error." >> "$LOG_FILE" | |
else | |
exit 1 | |
fi | |
fi | |
else | |
echo "VM $vm_name is already on node $current_node - skipping migration" | |
echo "$(date): VM $vm_name is already on node $current_node - skipping migration" >> "$LOG_FILE" | |
fi | |
fi | |
done | |
# Resize storage VM disks | |
echo "[5/6] Configuring storage disks..." | |
# Skip this step if using NoCloud ISO - disk sizing will be handled in NoCloud configuration | |
if [[ -z "$NOCLOUD_ISO" ]]; then | |
# Use filtered VM list instead of getting all storage VMs from config | |
storage_vm_ids=() | |
for vm_id in "${all_vm_ids[@]}"; do | |
if [[ -n "$vm_id" ]]; then | |
role=$(get_vm_config "$vm_id" "role") | |
if [[ "$role" == "storage" ]]; then | |
storage_vm_ids+=("$vm_id") | |
fi | |
fi | |
done | |
for vm_id in "${storage_vm_ids[@]}"; do | |
if [[ -n "$vm_id" ]]; then | |
target_node=$(get_vm_config "$vm_id" "node") | |
resize_disk "$vm_id" scsi0 256G "$target_node" | |
fi | |
done | |
else | |
echo "Skipping storage disk resize - will be handled in NoCloud ISO configuration" | |
fi | |
# Configure VM auto-start behavior (using shared library) | |
echo "[6/7] Configuring VM auto-start behavior..." | |
for vm_id in "${all_vm_ids[@]}"; do | |
if [[ -n "$vm_id" ]]; then | |
role=$(get_vm_config "$vm_id" "role") | |
target_node=$(get_vm_config "$vm_id" "node") | |
configure_autostart "$vm_id" "vm" "$target_node" "$DISABLE_AUTO_START" | |
fi | |
done | |
# Configure NoCloud ISO if specified | |
if [[ -n "$NOCLOUD_ISO" ]]; then | |
echo "[7/7] Configuring NoCloud ISO for VMs..." | |
for vm_id in "${all_vm_ids[@]}"; do | |
if [[ -n "$vm_id" ]]; then | |
target_node=$(get_vm_config "$vm_id" "node") | |
configure_nocloud_iso "$vm_id" "$NOCLOUD_ISO" "$target_node" | |
fi | |
done | |
else | |
echo "[7/7] Skipping NoCloud ISO configuration (not specified)" | |
fi | |
echo "===== VM Provisioning Complete =====" | |
echo "All K3s VMs have been provisioned successfully." | |
if [[ -n "$NOCLOUD_ISO" ]]; then | |
echo "All VMs have been configured with NoCloud ISO: $NOCLOUD_ISO" | |
fi | |
if [[ "$DISABLE_AUTO_START" == "true" ]]; then | |
echo "Auto-start on boot has been disabled for all VMs." | |
else | |
echo "Auto-start on boot has been enabled for all VMs." | |
fi | |
echo "$(date): VM Provisioning Complete - All K3s VMs have been provisioned successfully." >> "$LOG_FILE" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment