Skip to content

Instantly share code, notes, and snippets.

@wallentx
Created March 24, 2025 06:31
Show Gist options
  • Save wallentx/830b002c69a968d7ed20aa71e305a173 to your computer and use it in GitHub Desktop.
Save wallentx/830b002c69a968d7ed20aa71e305a173 to your computer and use it in GitHub Desktop.
Fuck them btrfs drives
#!/usr/bin/env bash
set -e
## Description:
## This script is an orchestrator that scans for btrfs volumes under PLOTS_BASE only once
## and then spawns one worker process per btrfs volume.
##
## Each worker is responsible for draining its assigned btrfs volume—one plot at a time—
## by transferring a *.plot file (from anywhere within that volume) to a suitable target drive.
## The worker computes the relative path of each plot using realpath so that the directory
## structure is preserved on the target drive.
##
## Destination drives (candidate target drives) are all mount points under PLOTS_BASE that
## are not btrfs. Before transferring, a worker atomically reserves the target drive by using
## mkdir to create a ".drain_busy" directory. This prevents multiple workers from writing to
## the same drive concurrently.
##
## When a worker finds no more *.plot files, it unmounts its volume and exits. When all workers
## are done, the orchestrator exits.
##
## A shutdown cleanup routine is installed via trap to remove stale busy markers and to kill all
## child processes if the parent is terminated.
##
## Logs are written to RSYNC_LOG and OPERATION_LOG; messages are also sent to systemd-cat.
########################
# Configuration
########################
PLOTS_BASE="/mnt/plot" # Base directory containing all mount points (e.g., plots1..167)
RSYNC_LOG="/tmp/btrfs_drain_rsync.log"
OPERATION_LOG="/tmp/btrfs_drain_operations.log"
LOCK_FILE="/tmp/btrfs_drain.lock"
OWNER="william:william" # Owner to set on remounted volumes (if needed)
########################
# Helper Functions
########################
# Log messages to console, file, and systemd journal.
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" \
| tee -a "$OPERATION_LOG" \
| systemd-cat -t btrfs-drain
}
# Ensure required commands exist.
check_deps() {
for cmd in rsync findmnt dd df stat sudo umount realpath shuf; do
if ! command -v "$cmd" &>/dev/null; then
log "ERROR: Missing required command: $cmd"
exit 1
fi
done
}
# Return a list of candidate target drives.
# Candidate target drives are those mount points under PLOTS_BASE that are NOT btrfs.
get_target_drives() {
local btrfs_drives
btrfs_drives=$(findmnt --noheadings --list -t btrfs -o TARGET)
findmnt --noheadings --list -o TARGET | grep "^$PLOTS_BASE" | while read mount; do
if ! echo "$btrfs_drives" | grep -qx "$mount"; then
echo "$mount"
fi
done
}
# Run a quick I/O test on a given directory.
test_io() {
local test_dir="$1"
local testfile="$test_dir/test-write.$$"
if dd if=/dev/zero of="$testfile" bs=4K count=1 oflag=sync 2>/dev/null; then
rm -f "$testfile"
return 0
else
log "I/O test failed on $test_dir"
return 1
fi
}
# Check if a directory has enough free space (plot_size plus a 1GB margin).
has_enough_space() {
local target_dir="$1"
local plot_size="$2"
local avail
avail=$(df --output=avail -B1 "$target_dir" | tail -n 1)
if [ "$avail" -gt $((plot_size + 1073741824)) ]; then
return 0
else
return 1
fi
}
# For a given plot file, select a target drive that is not busy,
# passes I/O and free space tests, and then return it.
find_target_for_plot() {
local plot_path="$1"
local plot_size
plot_size=$(stat --printf='%s' "$plot_path")
local target=""
# Randomize the order of candidate target drives using shuf.
for d in $(get_target_drives | shuf); do
# Skip if this target is already reserved (check for the busy marker as a directory).
if [ -d "$d/.drain_busy" ]; then
continue
fi
if test_io "$d" && has_enough_space "$d" "$plot_size"; then
target="$d"
break
fi
done
echo "$target"
}
# Cleanup function to remove stale busy markers and temporary rsync files.
shutdown_cleanup() {
log "Running shutdown cleanup..."
for d in $(get_target_drives); do
if [ -d "$d/.drain_busy" ]; then
log "Removing stale busy marker from $d"
rm -rf "$d/.drain_busy"
fi
# Optionally remove any temporary rsync files (if needed)
# find "$d" -maxdepth 1 -type f -name ".~tmp~*" -exec rm -f {} \;
done
log "Shutdown cleanup completed."
}
# Prevent multiple instances of the orchestrator.
check_running() {
if [ -e "$LOCK_FILE" ]; then
local PID
PID=$(cat "$LOCK_FILE")
if ps -p "$PID" >/dev/null; then
log "Another instance is already running with PID $PID. Exiting."
exit 1
else
log "Found stale lock file. Removing."
rm -f "$LOCK_FILE"
fi
fi
echo $$ > "$LOCK_FILE"
}
########################
# Worker Function
########################
# Each worker is given one btrfs volume (source_volume) and repeatedly:
# - Finds one *.plot file on that volume.
# - Computes its relative path (using realpath --relative-to) from the source volume.
# - Finds a target drive that meets the criteria.
# - Atomically reserves the target drive by creating a busy marker directory.
# - Transfers the plot via rsync (with --remove-source-files) to the target drive,
# preserving the relative directory structure.
# - Removes the busy marker and continues with the next plot.
# - When no plot file is left, the worker unmounts the volume and exits.
drain_worker() {
local source_volume="$1"
log "Worker started for btrfs volume: $source_volume"
# Install a trap so that if this worker is terminated, any child processes (like rsync) are killed.
trap 'log "Worker for $source_volume received termination signal. Killing child processes."; pkill -P $$; exit 1' SIGTERM SIGINT
while true; do
local plot_file
plot_file=$(find "$source_volume" -type f -name "*.plot" | head -n 1)
if [ -z "$plot_file" ]; then
log "No more plot files in $source_volume. Unmounting volume."
sudo umount "$source_volume"
log "Worker for $source_volume exiting."
exit 0
fi
# Compute the relative path using realpath.
local rel_path
rel_path=$(realpath --relative-to="$source_volume" "$plot_file")
local target_subdir
target_subdir=$(dirname "$rel_path")
local target_drive
target_drive=$(find_target_for_plot "$plot_file")
if [ -z "$target_drive" ]; then
log "No available target drive for $plot_file from $source_volume. Waiting..."
sleep 30
continue
fi
# Atomically reserve the target drive.
if ! mkdir "$target_drive/.drain_busy" 2>/dev/null; then
log "Failed to reserve target drive $target_drive for $plot_file; already reserved. Retrying..."
sleep 5
continue
fi
local target_dir="$target_drive/$target_subdir"
mkdir -p "$target_dir"
log "Transferring $plot_file from $source_volume to $target_dir/"
rsync -a --progress --remove-source-files "$plot_file" "$target_dir/" >> "$RSYNC_LOG" 2>&1
# Remove the busy marker.
rm -rf "$target_drive/.drain_busy"
done
}
########################
# Orchestrator (Main)
########################
main() {
log "Starting BTRFS Volume Drain Orchestrator"
check_deps
check_running
# Set a trap in the parent to perform shutdown cleanup and kill the entire process group.
trap 'shutdown_cleanup; kill -- -$$' SIGTERM SIGINT EXIT
# Cleanup any stale busy markers before starting.
shutdown_cleanup
local btrfs_volumes
btrfs_volumes=$(findmnt --noheadings --list -t btrfs -o TARGET | grep "^$PLOTS_BASE")
if [ -z "$btrfs_volumes" ]; then
log "No btrfs volumes found. Exiting."
exit 0
fi
local pids=()
while IFS= read -r vol; do
drain_worker "$vol" &
pids+=($!)
log "Spawned worker for $vol with PID ${pids[-1]}"
done <<< "$btrfs_volumes"
for pid in "${pids[@]}"; do
wait "$pid"
done
log "All workers have finished. Orchestrator exiting."
}
main
`journalctl -o cat --no-hostname -t btrfs-drain`
Starting BTRFS Volume Drain Orchestrator
Found stale lock file. Removing.
Running shutdown cleanup...
Shutdown cleanup completed.
Spawned worker for /mnt/plot/plots115 with PID 1948378
Worker started for btrfs volume: /mnt/plot/plots115
Spawned worker for /mnt/plot/plots114 with PID 1948387
Worker started for btrfs volume: /mnt/plot/plots114
Spawned worker for /mnt/plot/plots118 with PID 1948403
Worker started for btrfs volume: /mnt/plot/plots118
Worker started for btrfs volume: /mnt/plot/plots103
Spawned worker for /mnt/plot/plots103 with PID 1948425
Spawned worker for /mnt/plot/plots101 with PID 1948449
Worker started for btrfs volume: /mnt/plot/plots101
Spawned worker for /mnt/plot/plots102 with PID 1948476
Worker started for btrfs volume: /mnt/plot/plots102
Worker started for btrfs volume: /mnt/plot/plots108
Spawned worker for /mnt/plot/plots108 with PID 1948512
Spawned worker for /mnt/plot/plots109 with PID 1948561
Worker started for btrfs volume: /mnt/plot/plots109
Spawned worker for /mnt/plot/plots48 with PID 1948615
Worker started for btrfs volume: /mnt/plot/plots48
Worker started for btrfs volume: /mnt/plot/plots106
Spawned worker for /mnt/plot/plots106 with PID 1948673
Spawned worker for /mnt/plot/plots35 with PID 1948758
Worker started for btrfs volume: /mnt/plot/plots35
Worker started for btrfs volume: /mnt/plot/plots155
Spawned worker for /mnt/plot/plots155 with PID 1948837
Spawned worker for /mnt/plot/plots98 with PID 1948927
Worker started for btrfs volume: /mnt/plot/plots98
Spawned worker for /mnt/plot/plots107 with PID 1949004
Worker started for btrfs volume: /mnt/plot/plots107
Spawned worker for /mnt/plot/plots162 with PID 1949091
Worker started for btrfs volume: /mnt/plot/plots162
Worker started for btrfs volume: /mnt/plot/plots104
Spawned worker for /mnt/plot/plots104 with PID 1949178
Worker started for btrfs volume: /mnt/plot/plots152
Spawned worker for /mnt/plot/plots152 with PID 1949280
Spawned worker for /mnt/plot/plots149 with PID 1949404
Worker started for btrfs volume: /mnt/plot/plots149
Spawned worker for /mnt/plot/plots153 with PID 1949510
Worker started for btrfs volume: /mnt/plot/plots153
Spawned worker for /mnt/plot/plots151 with PID 1949614
Worker started for btrfs volume: /mnt/plot/plots151
Spawned worker for /mnt/plot/plots165 with PID 1949733
Worker started for btrfs volume: /mnt/plot/plots165
Spawned worker for /mnt/plot/plots167 with PID 1949848
Worker started for btrfs volume: /mnt/plot/plots167
Spawned worker for /mnt/plot/plots34 with PID 1949976
Worker started for btrfs volume: /mnt/plot/plots34
Spawned worker for /mnt/plot/plots157 with PID 1950101
Worker started for btrfs volume: /mnt/plot/plots157
Worker started for btrfs volume: /mnt/plot/plots166
Spawned worker for /mnt/plot/plots166 with PID 1950242
Spawned worker for /mnt/plot/plots161 with PID 1950386
Worker started for btrfs volume: /mnt/plot/plots161
Transferring /mnt/plot/plots115/cuda/plot-k32-c07-2025-02-17-23-05-a5580ce6f5c2dc88fdef6dad1350b9f8f5b7c6c44abde89979933b441cb10567.plot from /mnt/plot/plots115 to /mnt/plot/plots32/cuda/
Transferring /mnt/plot/plots102/cuda/plot-k32-c07-2025-02-16-03-02-a0c5a82dec2625e34edf3251607b4b4d79a9e2686d11501b4e3ca21ffad01d2d.plot from /mnt/plot/plots102 to /mnt/plot/plots24/cuda/
Transferring /mnt/plot/plots114/cuda/plot-k32-c07-2025-02-18-15-42-dd0a4af20646645cbefa4aa18895907aedbd3a52456bdb855b8c340b8591f340.plot from /mnt/plot/plots114 to /mnt/plot/plots29/cuda/
Transferring /mnt/plot/plots109/cuda/plot-k32-c07-2025-02-24-04-25-80f3785303472192fdf64112ee5f2012745a877bd20d59b89c73267c7fdabc0c.plot from /mnt/plot/plots109 to /mnt/plot/plots57/cuda/
Transferring /mnt/plot/plots101/cuda/plot-k32-c07-2025-02-17-03-25-d1621e72dbda760a84e3e985a74d79bce6e6d67c153c88b4b3c0bc52620d9df6.plot from /mnt/plot/plots101 to /mnt/plot/plots66/cuda/
Transferring /mnt/plot/plots155/cuda/plot-k32-c07-2025-03-07-08-34-941406bd519130a340380dcd1dfde9093e58abf76f812f05e8ca9a1df2ce88ca.plot from /mnt/plot/plots155 to /mnt/plot/plots112/cuda/
Transferring /mnt/plot/plots48/cuda/plot-k32-c07-2025-02-09-03-30-89c0ba8454f501a48176d616b5e51eb5db8f8cf92683a6b2daa3ca544102df91.plot from /mnt/plot/plots48 to /mnt/plot/plots2/cuda/
Transferring /mnt/plot/plots98/cuda/plot-k32-c07-2025-02-13-18-51-ac56bfe08246e710e281995a039f8d0a3193c79b2371d0b7b48ba2333b63d365.plot from /mnt/plot/plots98 to /mnt/plot/plots1/cuda/
Transferring /mnt/plot/plots106/cuda/plot-k32-c07-2025-02-13-14-37-74f509414f6fe86b3820fffd3c199d1510df54c1bf59331b9810c0454ac988cd.plot from /mnt/plot/plots106 to /mnt/plot/plots4/cuda/
Transferring /mnt/plot/plots151/cuda/plot-k32-c07-2025-03-07-01-34-41f20f363c660b42c446d1b0c2dc9c2f32516f1ee9b7f0285fd26f0e5d3ab07c.plot from /mnt/plot/plots151 to /mnt/plot/plots22/cuda/
Transferring /mnt/plot/plots35/cuda/plot-k32-c07-2025-02-22-05-23-2eeb7921d9a424811bd1a6521b92e0403b463d18e8edee1a21ae11302c487508.plot from /mnt/plot/plots35 to /mnt/plot/plots36/cuda/
Transferring /mnt/plot/plots104/cuda/plot-k32-c07-2025-02-18-12-44-4a02aff481011cdfacbf386d6af07a976fc921a7196cac9959d403ea0d9fb3ab.plot from /mnt/plot/plots104 to /mnt/plot/plots67/cuda/
Transferring /mnt/plot/plots153/cuda/plot-k32-c07-2025-03-03-16-14-6448ac0a2415e2d8de8ac70eb951f010e599ce5f9ea8d1a3e34bad60af6698e9.plot from /mnt/plot/plots153 to /mnt/plot/plots126/cuda/
Transferring /mnt/plot/plots34/cuda/plot-k32-c07-2025-02-21-17-31-e7ee199eb7bf056df23381d52f1db85e212c882b392da88c24263c3b042b7fe5.plot from /mnt/plot/plots34 to /mnt/plot/plots64/cuda/
Transferring /mnt/plot/plots107/cuda/plot-k32-c07-2025-02-05-05-26-d53c4db0473c788ab032f778e2e1bcc5c5acf93149b909defb398502ed7e2aa7.plot from /mnt/plot/plots107 to /mnt/plot/plots50/cuda/
Transferring /mnt/plot/plots152/cuda/plot-k32-c07-2025-02-28-15-26-80b2aa062fd2bc132c7a4ffe1f10e09ee2f5341591180a3baeb908d3b6b01d35.plot from /mnt/plot/plots152 to /mnt/plot/plots85/cuda/
Transferring /mnt/plot/plots103/cuda/plot-k32-c07-2025-02-13-11-16-19dadca860fcc4fb20a9d0ee3aeddb885f1656e9e257d77db4283943b0b65c31.plot from /mnt/plot/plots103 to /mnt/plot/plots9/cuda/
Transferring /mnt/plot/plots118/cuda/plot-k32-c07-2025-02-14-11-34-93e719fd72f7cb71aad340eef0ad6020642e783ed16996e9ee208785f76be14b.plot from /mnt/plot/plots118 to /mnt/plot/plots12/cuda/
Transferring /mnt/plot/plots149/cuda/plot-k32-c07-2025-03-07-06-07-f03a4ee2ca9d34666d477232402d9f338360c9ece94fc81c8e7b53a7ae5843e1.plot from /mnt/plot/plots149 to /mnt/plot/plots111/cuda/
Transferring /mnt/plot/plots162/cuda/plot-k32-c07-2025-03-05-12-05-2a0fd171d391bbb78a79b49429844f872cc67194175ad13b40a7ac469cb247a8.plot from /mnt/plot/plots162 to /mnt/plot/plots40/cuda/
Failed to reserve target drive /mnt/plot/plots111 for /mnt/plot/plots165/cuda/plot-k32-c07-2025-03-01-03-00-bdeb7ccb3f240eab16ffa74f4da14fc49168feb0eee9c9c05872afae9173a0b4.plot;
Transferring /mnt/plot/plots166/cuda/plot-k32-c07-2025-03-06-18-40-71bd656db552dda8332ce249718dece22978d613db5e2f5cd2c2be83b0ce663f.plot from /mnt/plot/plots166 to /mnt/plot/plots83/cuda/
Transferring /mnt/plot/plots167/cuda/plot-k32-c07-2025-03-04-18-51-77eec43ffc76a8f8c92402cfdebad5a8c5f5d3f2eede8780620587192d3f9a6c.plot from /mnt/plot/plots167 to /mnt/plot/plots62/cuda/
Transferring /mnt/plot/plots161/cuda/plot-k32-c07-2025-03-06-05-34-6af7d9e22aaed781e16665b2bfe41cd7686f836dcd1b77e0f5214c9cd9a2eebd.plot from /mnt/plot/plots161 to /mnt/plot/plots74/cuda/
Transferring /mnt/plot/plots108/cuda/plot-k32-c07-2025-02-23-02-20-c30e7d3b5f21d2f91a78e76c61ad121278ba7ff846f01368794a4b92c45d40c3.plot from /mnt/plot/plots108 to /mnt/plot/plots100/cuda/
Transferring /mnt/plot/plots157/cuda/plot-k32-c07-2025-03-07-22-32-cf4ed289bd1bf72332a7d00cbad18fbc056a34c6aaaabffbf4c089d5a9d06687.plot from /mnt/plot/plots157 to /mnt/plot/plots84/cuda/
Transferring /mnt/plot/plots165/cuda/plot-k32-c07-2025-03-01-03-00-bdeb7ccb3f240eab16ffa74f4da14fc49168feb0eee9c9c05872afae9173a0b4.plot from /mnt/plot/plots165 to /mnt/plot/plots159/cuda/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment