Created
December 22, 2024 22:53
-
-
Save brong/6a23fee1480f2d62b8a18ade5aea667a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
####### | |
# A command line tool for zfs replication at Fastmail | |
# | |
# TODO: add more error state handling (e.g. remote down) | |
# TODO: add force-failover-here mode | |
# TODO: add force-reset-remote mode (split brain recovery) | |
# | |
# This script using the following two get/set keys on the filesystem: | |
# zfsrepl:primary (boolean: 'yes' or 'no') | |
# zfsrepl:peer (hostname of remote server) | |
# | |
# As well as protecting the filesystem by using 'readonly' (boolean: 'off' or 'on') | |
# | |
# We create snapshots of the form: zfsrepl_(timestamp); of which we keep the most recent 5 | |
# | |
# The 'sync' command will only create a new snapshot if there is a delta between the most | |
# recent snapshot and the current state of the filesystem. | |
# | |
use v5.20; | |
use feature qw(signatures); | |
use Getopt::Long::Descriptive; | |
use Process::Status; | |
use IO::LockedFile; | |
use DateTime; | |
my ($opt, $usage) = describe_options( | |
'%c %o <cmd> <name> | |
Commands: | |
- clear, setup, sync, down, up, status, diff | |
Options:', | |
['really', 'really do it'], | |
['peer|p=s', 'Peer to the other host'], | |
['verbose|v', 'print commmands being run' ], | |
['quiet|q', 'suppress most output' ], | |
['force|f', 'Force action'], | |
['help|h', 'Show help and exit'], | |
); | |
print($usage->text), exit if $opt->help; | |
die "$0 must be run as root.\n" unless $< == 0; | |
my $cmd = shift; | |
die "need a command\n" . $usage->text unless $cmd; | |
my $code = __PACKAGE__->can("do_$cmd"); | |
die "unknown command $cmd\n" . $usage->text unless $code; | |
my $name = shift; | |
die "need a name\n" . $usage->text unless $name; | |
die "Unknown filesystem $name" unless zfsexists(0); | |
my $lock = IO::LockedFile->new("/run/lock/zfsrepl.lock", 'w'); | |
my $peer = $opt->peer // zfsget(0, 'zfsrepl:peer'); | |
my $sshcmd = $peer ? "/usr/bin/ssh -c aes128-gcm\@openssh.com $peer" : ''; | |
my $needreally = 0; | |
my $hostname = `hostname`; | |
chomp $hostname; | |
my $exitcode = 0; | |
# run the action | |
$code->(); | |
say "Run again with --really to make changes" if $needreally; | |
exit $exitcode; | |
sub do_clear () { | |
my @todo = (0); | |
# if there's a remote filesystem, clean that too (but leave the data intact) | |
unshift @todo, 1 if ($peer and zfsexists(1)); | |
for my $remote (@todo) { | |
my $type = $remote ? 'remote' : 'local'; | |
if (zfsget($remote, 'zfsrepl:primary')) { | |
say "wiping $type zfsrepl:primary" unless $opt->quiet; | |
zfscmd($remote, "inherit zfsrepl:primary tank/$name"); | |
} | |
if (zfsget($remote, 'zfsrepl:peer')) { | |
say "wiping $type zfsrepl:peer" unless $opt->quiet; | |
zfscmd($remote, "inherit zfsrepl:peer tank/$name"); | |
} | |
my %snap = zfssnaps($remote); | |
for my $ts (sort { $b cmp $a } keys %snap) { | |
say "Removing $type snapshot tank/$name\@$ts" unless $opt->quiet; | |
zfscmd($remote, "destroy tank/$name\@zfsrepl_$ts"); | |
} | |
say "zfsrepl data removed on " . ($remote ? $peer : $hostname); | |
} | |
say "Filesystem tank/$name still exists on $peer" if ($peer and zfsexists(1)); | |
} | |
sub do_setup () { | |
die "Already have zfsrepl:primary" if zfsget(0, 'zfsrepl:primary'); | |
die "Already have zfsrepl:peer" if zfsget(0, 'zfsrepl:peer'); | |
my %local = zfssnaps(0); | |
die "Already have local snapshots" if keys %local; | |
die "Need a peer" unless $peer; | |
if (zfsexists(1)) { | |
die "Peer filesystem already exists" unless $opt->force; | |
say "WIPING REMOTE FILESYSTEM tank/$name (--force)"; | |
zfscmd(1, "destroy tank/$name"); | |
} | |
my $now = DateTime->now->iso8601; | |
say "Creating first snapshot $now" unless $opt->quiet; | |
zfscmd(0, "snapshot tank/$name\@zfsrepl_$now"); | |
print "Sending snapshot\n"; | |
zfsinit($now); | |
say "Setting local peer to $peer"; | |
zfsset(0, 'zfsrepl:peer', $peer); | |
zfsset(0, 'zfsrepl:primary', 'yes'); | |
say "Replication for tank/$name set up to $peer"; | |
} | |
sub do_sync () { | |
die "Need peer to sync" unless $peer; | |
my $primary = zfsget(0, 'zfsrepl:primary'); | |
die "Unable to read primary status" unless $primary; | |
if ($primary eq 'yes') { | |
# force here means - create a new snapshot even if nothing has changed since last one | |
zfssync($opt->force); | |
} | |
else { | |
say "Not syncing, not primary" unless $opt->quiet; | |
} | |
} | |
sub do_down () { | |
die "Need peer to sync" unless $peer; | |
my $primary = zfsget(0, 'zfsrepl:primary'); | |
die "Unable to read primary status" unless $primary; | |
if ($primary eq 'no') { | |
say "Already not primary on $hostname tank/$name" unless $opt->quiet; | |
return; | |
} | |
if (zfsget(0, 'readonly') ne 'on') { | |
say "Becoming read-only" unless $opt->quiet; | |
zfsset(0, 'readonly', 'on'); | |
} | |
# sync data after becoming readonly to ensure no diff remains | |
zfssync($opt->force); | |
zfsset(0, 'zfsrepl:primary', 'no'); | |
say "Primary shut down tank/$name, safe to bring up here or on $peer" unless $opt->quiet; | |
} | |
sub do_up () { | |
my $primary = zfsget(0, 'zfsrepl:primary'); | |
die "Unable to read primary status" unless $primary; | |
if ($primary eq 'yes') { | |
say "Already primary on $hostname tank/$name" unless $opt->quiet; | |
return; | |
} | |
# make sure the OTHER end is in replica mode | |
my $other = zfsget(1, 'zfsrepl:primary') // 'unknown'; | |
die "Other side is has primary state $other" if $other ne 'no'; | |
# make sure other side is readonly | |
$other = zfsget(1, 'readonly') // 'unknown'; | |
die "Other side is has readonly status $other" if $other ne 'on'; | |
if (zfsget(0, 'readonly') eq 'on') { | |
say "Becoming read-write" unless $opt->quiet; | |
zfsset(0, 'readonly', 'off'); | |
} | |
zfsset(0, 'zfsrepl:primary', 'yes'); | |
say "$hostname is primary for tank/$name" unless $opt->quiet; | |
} | |
sub do_status () { | |
my $primary = zfsget(0, 'zfsrepl:primary') // 'no'; | |
$exitcode = 3 unless $primary eq 'yes'; | |
} | |
sub do_diff () { | |
my $primary = zfsget(0, 'zfsrepl:primary') // 'no'; | |
die "Not primary" unless $primary eq 'yes'; | |
my %local = zfssnaps(0); | |
my @rev = sort { $b cmp $a } keys %local; | |
my $latest = shift @rev; | |
# we use "system" here to print the output directly | |
system('zfs', 'diff', "tank/$name\@zfsrepl_$latest"); | |
} | |
sub zfssync ($force) { | |
my %local = zfssnaps(0); | |
my %remote = zfssnaps(1); | |
# make sure we have a shared snapshot! | |
my @rev = sort { $b cmp $a } keys %local; | |
my ($latest) = grep { $remote{$_} } @rev; | |
die "No latest snapshot with remote $name" unless $latest; | |
# remove any local snapshots that aren't present remotely | |
for my $bogus (grep { not $remote{$_} } @rev) { | |
say "Removing spurious local snapshot $bogus" unless $opt->quiet; | |
zfscmd(0, "destroy tank/$name\@zfsrepl_$bogus"); | |
delete $local{$bogus}; | |
} | |
# remove any remote snapshots that aren't present locally | |
for my $bogus (grep { not $local{$_} } keys %remote) { | |
say "Removing spurious remote snapshot $bogus" unless $opt->quiet; | |
zfscmd(1, "destroy tank/$name\@zfsrepl_$bogus"); | |
delete $remote{$bogus}; | |
} | |
# now we're up to date, are there any bytes of data to sync? | |
if ($force or zfsget(0, "written\@zfsrepl_$latest")) { | |
my $now = DateTime->now->iso8601; | |
while ($local{$now}) { | |
say "Waiting a second, already have snapshot for $now" unless $opt->quiet; | |
sleep 1; | |
$now = DateTime->now->iso8601; | |
} | |
zfscmd(0, "snapshot tank/$name\@zfsrepl_$now"); | |
zfssend($latest, $now); | |
my %all = (%local, %remote); | |
my @list = sort { $b cmp $a } keys %all; | |
shift @list for 1..4; # keep newest 4, yes I know I could use splice | |
for my $ts (@list) { | |
say "Removing old snapshot zfsrepl_$ts" unless $opt->quiet; | |
zfscmd(1, "destroy tank/$name\@zfsrepl_$ts") if $remote{$ts}; | |
zfscmd(0, "destroy tank/$name\@zfsrepl_$ts") if $local{$ts}; | |
} | |
} else { | |
say "No local changes, nothing to sync" if $opt->verbose; | |
} | |
} | |
sub runcmd ($remote, $destructive, $cmd) { | |
die "Trying to run a remote command with no peer" if ($remote and not $peer); | |
$cmd = "$sshcmd $cmd" if $remote; | |
my $do = (not $destructive or $opt->really); | |
if ($do) { | |
say "RUNNING: $cmd" if $opt->verbose; | |
my $result = `$cmd`; | |
Process::Status->assert_ok("$cmd:"); | |
chomp($result); | |
return if $result eq '-'; | |
if ($opt->verbose) { | |
say "=> $_" for split /\n/, $result; | |
} | |
return $result; | |
} else { | |
print "WOULD RUN: $cmd\n" if $opt->verbose; | |
$needreally = 1; | |
} | |
} | |
sub zfsget ($remote, $item) { | |
my $cmd = "zfs get -H -p -o value $item tank/$name"; | |
return runcmd($remote, 0, $cmd); | |
} | |
sub zfsset ($remote, $item, $val) { | |
my $cmd = "zfs set $item=$val tank/$name"; | |
return runcmd($remote, 1, $cmd); | |
} | |
sub zfsexists ($remote) { | |
my $cmd = "zfs list -H -p -o name -r tank"; | |
my @fs = split /\n/, runcmd($remote, 0, $cmd); | |
return grep { $_ eq "tank/$name" } @fs; | |
} | |
sub zfssnaps ($remote) { | |
my $cmd = "zfs list -H -p -o name -t snapshot tank/$name"; | |
my $result = runcmd($remote, 0, $cmd); | |
my @lines = split /\n/, $result; | |
my %res; | |
for (@lines) { | |
die "Weird output $_" unless m{^tank/$name\@zfsrepl_([0-9T:\-]+)$}; | |
$res{$1} = 1; | |
} | |
return %res; | |
} | |
sub zfssend ($from, $to) { | |
die "Trying to run a remote command with no peer" if (not $peer); | |
my $cmd = "zfs send -L -I \@zfsrepl_$from tank/$name\@zfsrepl_$to | $sshcmd zfs receive tank/$name"; | |
return runcmd(0, 1, $cmd); | |
} | |
sub zfsinit ($to) { | |
my $recordsize = zfsget(0, 'recordsize'); | |
die "Trying to run a remote command with no peer" if (not $peer); | |
my $cmd = "zfs send -L tank/$name\@zfsrepl_$to | $sshcmd zfs receive -o recordsize=$recordsize -o mountpoint=/mnt/$name -o readonly=on -o zfsrepl:primary=no -o zfsrepl:peer=$hostname tank/$name"; | |
return runcmd(0, 1, $cmd); | |
} | |
sub zfscmd ($remote, $todo) { | |
my $cmd = "zfs $todo"; | |
return runcmd($remote, 1, $cmd); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment