Created
March 24, 2025 06:31
-
-
Save wallentx/830b002c69a968d7ed20aa71e305a173 to your computer and use it in GitHub Desktop.
Fuck them btrfs drives
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -e | |
## Description: | |
## This script is an orchestrator that scans for btrfs volumes under PLOTS_BASE only once | |
## and then spawns one worker process per btrfs volume. | |
## | |
## Each worker is responsible for draining its assigned btrfs volume—one plot at a time— | |
## by transferring a *.plot file (from anywhere within that volume) to a suitable target drive. | |
## The worker computes the relative path of each plot using realpath so that the directory | |
## structure is preserved on the target drive. | |
## | |
## Destination drives (candidate target drives) are all mount points under PLOTS_BASE that | |
## are not btrfs. Before transferring, a worker atomically reserves the target drive by using | |
## mkdir to create a ".drain_busy" directory. This prevents multiple workers from writing to | |
## the same drive concurrently. | |
## | |
## When a worker finds no more *.plot files, it unmounts its volume and exits. When all workers | |
## are done, the orchestrator exits. | |
## | |
## A shutdown cleanup routine is installed via trap to remove stale busy markers and to kill all | |
## child processes if the parent is terminated. | |
## | |
## Logs are written to RSYNC_LOG and OPERATION_LOG; messages are also sent to systemd-cat. | |
######################## | |
# Configuration | |
######################## | |
PLOTS_BASE="/mnt/plot" # Base directory containing all mount points (e.g., plots1..167) | |
RSYNC_LOG="/tmp/btrfs_drain_rsync.log" | |
OPERATION_LOG="/tmp/btrfs_drain_operations.log" | |
LOCK_FILE="/tmp/btrfs_drain.lock" | |
OWNER="william:william" # Owner to set on remounted volumes (if needed) | |
######################## | |
# Helper Functions | |
######################## | |
# Log messages to console, file, and systemd journal. | |
log() { | |
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" \ | |
| tee -a "$OPERATION_LOG" \ | |
| systemd-cat -t btrfs-drain | |
} | |
# Ensure required commands exist. | |
check_deps() { | |
for cmd in rsync findmnt dd df stat sudo umount realpath shuf; do | |
if ! command -v "$cmd" &>/dev/null; then | |
log "ERROR: Missing required command: $cmd" | |
exit 1 | |
fi | |
done | |
} | |
# Return a list of candidate target drives. | |
# Candidate target drives are those mount points under PLOTS_BASE that are NOT btrfs. | |
get_target_drives() { | |
local btrfs_drives | |
btrfs_drives=$(findmnt --noheadings --list -t btrfs -o TARGET) | |
findmnt --noheadings --list -o TARGET | grep "^$PLOTS_BASE" | while read mount; do | |
if ! echo "$btrfs_drives" | grep -qx "$mount"; then | |
echo "$mount" | |
fi | |
done | |
} | |
# Run a quick I/O test on a given directory. | |
test_io() { | |
local test_dir="$1" | |
local testfile="$test_dir/test-write.$$" | |
if dd if=/dev/zero of="$testfile" bs=4K count=1 oflag=sync 2>/dev/null; then | |
rm -f "$testfile" | |
return 0 | |
else | |
log "I/O test failed on $test_dir" | |
return 1 | |
fi | |
} | |
# Check if a directory has enough free space (plot_size plus a 1GB margin). | |
has_enough_space() { | |
local target_dir="$1" | |
local plot_size="$2" | |
local avail | |
avail=$(df --output=avail -B1 "$target_dir" | tail -n 1) | |
if [ "$avail" -gt $((plot_size + 1073741824)) ]; then | |
return 0 | |
else | |
return 1 | |
fi | |
} | |
# For a given plot file, select a target drive that is not busy, | |
# passes I/O and free space tests, and then return it. | |
find_target_for_plot() { | |
local plot_path="$1" | |
local plot_size | |
plot_size=$(stat --printf='%s' "$plot_path") | |
local target="" | |
# Randomize the order of candidate target drives using shuf. | |
for d in $(get_target_drives | shuf); do | |
# Skip if this target is already reserved (check for the busy marker as a directory). | |
if [ -d "$d/.drain_busy" ]; then | |
continue | |
fi | |
if test_io "$d" && has_enough_space "$d" "$plot_size"; then | |
target="$d" | |
break | |
fi | |
done | |
echo "$target" | |
} | |
# Cleanup function to remove stale busy markers and temporary rsync files. | |
shutdown_cleanup() { | |
log "Running shutdown cleanup..." | |
for d in $(get_target_drives); do | |
if [ -d "$d/.drain_busy" ]; then | |
log "Removing stale busy marker from $d" | |
rm -rf "$d/.drain_busy" | |
fi | |
# Optionally remove any temporary rsync files (if needed) | |
# find "$d" -maxdepth 1 -type f -name ".~tmp~*" -exec rm -f {} \; | |
done | |
log "Shutdown cleanup completed." | |
} | |
# Prevent multiple instances of the orchestrator. | |
check_running() { | |
if [ -e "$LOCK_FILE" ]; then | |
local PID | |
PID=$(cat "$LOCK_FILE") | |
if ps -p "$PID" >/dev/null; then | |
log "Another instance is already running with PID $PID. Exiting." | |
exit 1 | |
else | |
log "Found stale lock file. Removing." | |
rm -f "$LOCK_FILE" | |
fi | |
fi | |
echo $$ > "$LOCK_FILE" | |
} | |
######################## | |
# Worker Function | |
######################## | |
# Each worker is given one btrfs volume (source_volume) and repeatedly: | |
# - Finds one *.plot file on that volume. | |
# - Computes its relative path (using realpath --relative-to) from the source volume. | |
# - Finds a target drive that meets the criteria. | |
# - Atomically reserves the target drive by creating a busy marker directory. | |
# - Transfers the plot via rsync (with --remove-source-files) to the target drive, | |
# preserving the relative directory structure. | |
# - Removes the busy marker and continues with the next plot. | |
# - When no plot file is left, the worker unmounts the volume and exits. | |
drain_worker() { | |
local source_volume="$1" | |
log "Worker started for btrfs volume: $source_volume" | |
# Install a trap so that if this worker is terminated, any child processes (like rsync) are killed. | |
trap 'log "Worker for $source_volume received termination signal. Killing child processes."; pkill -P $$; exit 1' SIGTERM SIGINT | |
while true; do | |
local plot_file | |
plot_file=$(find "$source_volume" -type f -name "*.plot" | head -n 1) | |
if [ -z "$plot_file" ]; then | |
log "No more plot files in $source_volume. Unmounting volume." | |
sudo umount "$source_volume" | |
log "Worker for $source_volume exiting." | |
exit 0 | |
fi | |
# Compute the relative path using realpath. | |
local rel_path | |
rel_path=$(realpath --relative-to="$source_volume" "$plot_file") | |
local target_subdir | |
target_subdir=$(dirname "$rel_path") | |
local target_drive | |
target_drive=$(find_target_for_plot "$plot_file") | |
if [ -z "$target_drive" ]; then | |
log "No available target drive for $plot_file from $source_volume. Waiting..." | |
sleep 30 | |
continue | |
fi | |
# Atomically reserve the target drive. | |
if ! mkdir "$target_drive/.drain_busy" 2>/dev/null; then | |
log "Failed to reserve target drive $target_drive for $plot_file; already reserved. Retrying..." | |
sleep 5 | |
continue | |
fi | |
local target_dir="$target_drive/$target_subdir" | |
mkdir -p "$target_dir" | |
log "Transferring $plot_file from $source_volume to $target_dir/" | |
rsync -a --progress --remove-source-files "$plot_file" "$target_dir/" >> "$RSYNC_LOG" 2>&1 | |
# Remove the busy marker. | |
rm -rf "$target_drive/.drain_busy" | |
done | |
} | |
######################## | |
# Orchestrator (Main) | |
######################## | |
main() { | |
log "Starting BTRFS Volume Drain Orchestrator" | |
check_deps | |
check_running | |
# Set a trap in the parent to perform shutdown cleanup and kill the entire process group. | |
trap 'shutdown_cleanup; kill -- -$$' SIGTERM SIGINT EXIT | |
# Cleanup any stale busy markers before starting. | |
shutdown_cleanup | |
local btrfs_volumes | |
btrfs_volumes=$(findmnt --noheadings --list -t btrfs -o TARGET | grep "^$PLOTS_BASE") | |
if [ -z "$btrfs_volumes" ]; then | |
log "No btrfs volumes found. Exiting." | |
exit 0 | |
fi | |
local pids=() | |
while IFS= read -r vol; do | |
drain_worker "$vol" & | |
pids+=($!) | |
log "Spawned worker for $vol with PID ${pids[-1]}" | |
done <<< "$btrfs_volumes" | |
for pid in "${pids[@]}"; do | |
wait "$pid" | |
done | |
log "All workers have finished. Orchestrator exiting." | |
} | |
main |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
`journalctl -o cat --no-hostname -t btrfs-drain` | |
Starting BTRFS Volume Drain Orchestrator | |
Found stale lock file. Removing. | |
Running shutdown cleanup... | |
Shutdown cleanup completed. | |
Spawned worker for /mnt/plot/plots115 with PID 1948378 | |
Worker started for btrfs volume: /mnt/plot/plots115 | |
Spawned worker for /mnt/plot/plots114 with PID 1948387 | |
Worker started for btrfs volume: /mnt/plot/plots114 | |
Spawned worker for /mnt/plot/plots118 with PID 1948403 | |
Worker started for btrfs volume: /mnt/plot/plots118 | |
Worker started for btrfs volume: /mnt/plot/plots103 | |
Spawned worker for /mnt/plot/plots103 with PID 1948425 | |
Spawned worker for /mnt/plot/plots101 with PID 1948449 | |
Worker started for btrfs volume: /mnt/plot/plots101 | |
Spawned worker for /mnt/plot/plots102 with PID 1948476 | |
Worker started for btrfs volume: /mnt/plot/plots102 | |
Worker started for btrfs volume: /mnt/plot/plots108 | |
Spawned worker for /mnt/plot/plots108 with PID 1948512 | |
Spawned worker for /mnt/plot/plots109 with PID 1948561 | |
Worker started for btrfs volume: /mnt/plot/plots109 | |
Spawned worker for /mnt/plot/plots48 with PID 1948615 | |
Worker started for btrfs volume: /mnt/plot/plots48 | |
Worker started for btrfs volume: /mnt/plot/plots106 | |
Spawned worker for /mnt/plot/plots106 with PID 1948673 | |
Spawned worker for /mnt/plot/plots35 with PID 1948758 | |
Worker started for btrfs volume: /mnt/plot/plots35 | |
Worker started for btrfs volume: /mnt/plot/plots155 | |
Spawned worker for /mnt/plot/plots155 with PID 1948837 | |
Spawned worker for /mnt/plot/plots98 with PID 1948927 | |
Worker started for btrfs volume: /mnt/plot/plots98 | |
Spawned worker for /mnt/plot/plots107 with PID 1949004 | |
Worker started for btrfs volume: /mnt/plot/plots107 | |
Spawned worker for /mnt/plot/plots162 with PID 1949091 | |
Worker started for btrfs volume: /mnt/plot/plots162 | |
Worker started for btrfs volume: /mnt/plot/plots104 | |
Spawned worker for /mnt/plot/plots104 with PID 1949178 | |
Worker started for btrfs volume: /mnt/plot/plots152 | |
Spawned worker for /mnt/plot/plots152 with PID 1949280 | |
Spawned worker for /mnt/plot/plots149 with PID 1949404 | |
Worker started for btrfs volume: /mnt/plot/plots149 | |
Spawned worker for /mnt/plot/plots153 with PID 1949510 | |
Worker started for btrfs volume: /mnt/plot/plots153 | |
Spawned worker for /mnt/plot/plots151 with PID 1949614 | |
Worker started for btrfs volume: /mnt/plot/plots151 | |
Spawned worker for /mnt/plot/plots165 with PID 1949733 | |
Worker started for btrfs volume: /mnt/plot/plots165 | |
Spawned worker for /mnt/plot/plots167 with PID 1949848 | |
Worker started for btrfs volume: /mnt/plot/plots167 | |
Spawned worker for /mnt/plot/plots34 with PID 1949976 | |
Worker started for btrfs volume: /mnt/plot/plots34 | |
Spawned worker for /mnt/plot/plots157 with PID 1950101 | |
Worker started for btrfs volume: /mnt/plot/plots157 | |
Worker started for btrfs volume: /mnt/plot/plots166 | |
Spawned worker for /mnt/plot/plots166 with PID 1950242 | |
Spawned worker for /mnt/plot/plots161 with PID 1950386 | |
Worker started for btrfs volume: /mnt/plot/plots161 | |
Transferring /mnt/plot/plots115/cuda/plot-k32-c07-2025-02-17-23-05-a5580ce6f5c2dc88fdef6dad1350b9f8f5b7c6c44abde89979933b441cb10567.plot from /mnt/plot/plots115 to /mnt/plot/plots32/cuda/ | |
Transferring /mnt/plot/plots102/cuda/plot-k32-c07-2025-02-16-03-02-a0c5a82dec2625e34edf3251607b4b4d79a9e2686d11501b4e3ca21ffad01d2d.plot from /mnt/plot/plots102 to /mnt/plot/plots24/cuda/ | |
Transferring /mnt/plot/plots114/cuda/plot-k32-c07-2025-02-18-15-42-dd0a4af20646645cbefa4aa18895907aedbd3a52456bdb855b8c340b8591f340.plot from /mnt/plot/plots114 to /mnt/plot/plots29/cuda/ | |
Transferring /mnt/plot/plots109/cuda/plot-k32-c07-2025-02-24-04-25-80f3785303472192fdf64112ee5f2012745a877bd20d59b89c73267c7fdabc0c.plot from /mnt/plot/plots109 to /mnt/plot/plots57/cuda/ | |
Transferring /mnt/plot/plots101/cuda/plot-k32-c07-2025-02-17-03-25-d1621e72dbda760a84e3e985a74d79bce6e6d67c153c88b4b3c0bc52620d9df6.plot from /mnt/plot/plots101 to /mnt/plot/plots66/cuda/ | |
Transferring /mnt/plot/plots155/cuda/plot-k32-c07-2025-03-07-08-34-941406bd519130a340380dcd1dfde9093e58abf76f812f05e8ca9a1df2ce88ca.plot from /mnt/plot/plots155 to /mnt/plot/plots112/cuda/ | |
Transferring /mnt/plot/plots48/cuda/plot-k32-c07-2025-02-09-03-30-89c0ba8454f501a48176d616b5e51eb5db8f8cf92683a6b2daa3ca544102df91.plot from /mnt/plot/plots48 to /mnt/plot/plots2/cuda/ | |
Transferring /mnt/plot/plots98/cuda/plot-k32-c07-2025-02-13-18-51-ac56bfe08246e710e281995a039f8d0a3193c79b2371d0b7b48ba2333b63d365.plot from /mnt/plot/plots98 to /mnt/plot/plots1/cuda/ | |
Transferring /mnt/plot/plots106/cuda/plot-k32-c07-2025-02-13-14-37-74f509414f6fe86b3820fffd3c199d1510df54c1bf59331b9810c0454ac988cd.plot from /mnt/plot/plots106 to /mnt/plot/plots4/cuda/ | |
Transferring /mnt/plot/plots151/cuda/plot-k32-c07-2025-03-07-01-34-41f20f363c660b42c446d1b0c2dc9c2f32516f1ee9b7f0285fd26f0e5d3ab07c.plot from /mnt/plot/plots151 to /mnt/plot/plots22/cuda/ | |
Transferring /mnt/plot/plots35/cuda/plot-k32-c07-2025-02-22-05-23-2eeb7921d9a424811bd1a6521b92e0403b463d18e8edee1a21ae11302c487508.plot from /mnt/plot/plots35 to /mnt/plot/plots36/cuda/ | |
Transferring /mnt/plot/plots104/cuda/plot-k32-c07-2025-02-18-12-44-4a02aff481011cdfacbf386d6af07a976fc921a7196cac9959d403ea0d9fb3ab.plot from /mnt/plot/plots104 to /mnt/plot/plots67/cuda/ | |
Transferring /mnt/plot/plots153/cuda/plot-k32-c07-2025-03-03-16-14-6448ac0a2415e2d8de8ac70eb951f010e599ce5f9ea8d1a3e34bad60af6698e9.plot from /mnt/plot/plots153 to /mnt/plot/plots126/cuda/ | |
Transferring /mnt/plot/plots34/cuda/plot-k32-c07-2025-02-21-17-31-e7ee199eb7bf056df23381d52f1db85e212c882b392da88c24263c3b042b7fe5.plot from /mnt/plot/plots34 to /mnt/plot/plots64/cuda/ | |
Transferring /mnt/plot/plots107/cuda/plot-k32-c07-2025-02-05-05-26-d53c4db0473c788ab032f778e2e1bcc5c5acf93149b909defb398502ed7e2aa7.plot from /mnt/plot/plots107 to /mnt/plot/plots50/cuda/ | |
Transferring /mnt/plot/plots152/cuda/plot-k32-c07-2025-02-28-15-26-80b2aa062fd2bc132c7a4ffe1f10e09ee2f5341591180a3baeb908d3b6b01d35.plot from /mnt/plot/plots152 to /mnt/plot/plots85/cuda/ | |
Transferring /mnt/plot/plots103/cuda/plot-k32-c07-2025-02-13-11-16-19dadca860fcc4fb20a9d0ee3aeddb885f1656e9e257d77db4283943b0b65c31.plot from /mnt/plot/plots103 to /mnt/plot/plots9/cuda/ | |
Transferring /mnt/plot/plots118/cuda/plot-k32-c07-2025-02-14-11-34-93e719fd72f7cb71aad340eef0ad6020642e783ed16996e9ee208785f76be14b.plot from /mnt/plot/plots118 to /mnt/plot/plots12/cuda/ | |
Transferring /mnt/plot/plots149/cuda/plot-k32-c07-2025-03-07-06-07-f03a4ee2ca9d34666d477232402d9f338360c9ece94fc81c8e7b53a7ae5843e1.plot from /mnt/plot/plots149 to /mnt/plot/plots111/cuda/ | |
Transferring /mnt/plot/plots162/cuda/plot-k32-c07-2025-03-05-12-05-2a0fd171d391bbb78a79b49429844f872cc67194175ad13b40a7ac469cb247a8.plot from /mnt/plot/plots162 to /mnt/plot/plots40/cuda/ | |
Failed to reserve target drive /mnt/plot/plots111 for /mnt/plot/plots165/cuda/plot-k32-c07-2025-03-01-03-00-bdeb7ccb3f240eab16ffa74f4da14fc49168feb0eee9c9c05872afae9173a0b4.plot; | |
Transferring /mnt/plot/plots166/cuda/plot-k32-c07-2025-03-06-18-40-71bd656db552dda8332ce249718dece22978d613db5e2f5cd2c2be83b0ce663f.plot from /mnt/plot/plots166 to /mnt/plot/plots83/cuda/ | |
Transferring /mnt/plot/plots167/cuda/plot-k32-c07-2025-03-04-18-51-77eec43ffc76a8f8c92402cfdebad5a8c5f5d3f2eede8780620587192d3f9a6c.plot from /mnt/plot/plots167 to /mnt/plot/plots62/cuda/ | |
Transferring /mnt/plot/plots161/cuda/plot-k32-c07-2025-03-06-05-34-6af7d9e22aaed781e16665b2bfe41cd7686f836dcd1b77e0f5214c9cd9a2eebd.plot from /mnt/plot/plots161 to /mnt/plot/plots74/cuda/ | |
Transferring /mnt/plot/plots108/cuda/plot-k32-c07-2025-02-23-02-20-c30e7d3b5f21d2f91a78e76c61ad121278ba7ff846f01368794a4b92c45d40c3.plot from /mnt/plot/plots108 to /mnt/plot/plots100/cuda/ | |
Transferring /mnt/plot/plots157/cuda/plot-k32-c07-2025-03-07-22-32-cf4ed289bd1bf72332a7d00cbad18fbc056a34c6aaaabffbf4c089d5a9d06687.plot from /mnt/plot/plots157 to /mnt/plot/plots84/cuda/ | |
Transferring /mnt/plot/plots165/cuda/plot-k32-c07-2025-03-01-03-00-bdeb7ccb3f240eab16ffa74f4da14fc49168feb0eee9c9c05872afae9173a0b4.plot from /mnt/plot/plots165 to /mnt/plot/plots159/cuda/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment