Skip to content

Instantly share code, notes, and snippets.

@brong
Created December 22, 2024 22:53
Show Gist options
  • Save brong/6a23fee1480f2d62b8a18ade5aea667a to your computer and use it in GitHub Desktop.
Save brong/6a23fee1480f2d62b8a18ade5aea667a to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
#######
# A command line tool for zfs replication at Fastmail
#
# TODO: add more error state handling (e.g. remote down)
# TODO: add force-failover-here mode
# TODO: add force-reset-remote mode (split brain recovery)
#
# This script using the following two get/set keys on the filesystem:
# zfsrepl:primary (boolean: 'yes' or 'no')
# zfsrepl:peer (hostname of remote server)
#
# As well as protecting the filesystem by using 'readonly' (boolean: 'off' or 'on')
#
# We create snapshots of the form: zfsrepl_(timestamp); of which we keep the most recent 5
#
# The 'sync' command will only create a new snapshot if there is a delta between the most
# recent snapshot and the current state of the filesystem.
#
use v5.20;
use feature qw(signatures);
use Getopt::Long::Descriptive;
use Process::Status;
use IO::LockedFile;
use DateTime;
my ($opt, $usage) = describe_options(
'%c %o <cmd> <name>
Commands:
- clear, setup, sync, down, up, status, diff
Options:',
['really', 'really do it'],
['peer|p=s', 'Peer to the other host'],
['verbose|v', 'print commmands being run' ],
['quiet|q', 'suppress most output' ],
['force|f', 'Force action'],
['help|h', 'Show help and exit'],
);
print($usage->text), exit if $opt->help;
die "$0 must be run as root.\n" unless $< == 0;
my $cmd = shift;
die "need a command\n" . $usage->text unless $cmd;
my $code = __PACKAGE__->can("do_$cmd");
die "unknown command $cmd\n" . $usage->text unless $code;
my $name = shift;
die "need a name\n" . $usage->text unless $name;
die "Unknown filesystem $name" unless zfsexists(0);
my $lock = IO::LockedFile->new("/run/lock/zfsrepl.lock", 'w');
my $peer = $opt->peer // zfsget(0, 'zfsrepl:peer');
my $sshcmd = $peer ? "/usr/bin/ssh -c aes128-gcm\@openssh.com $peer" : '';
my $needreally = 0;
my $hostname = `hostname`;
chomp $hostname;
my $exitcode = 0;
# run the action
$code->();
say "Run again with --really to make changes" if $needreally;
exit $exitcode;
sub do_clear () {
my @todo = (0);
# if there's a remote filesystem, clean that too (but leave the data intact)
unshift @todo, 1 if ($peer and zfsexists(1));
for my $remote (@todo) {
my $type = $remote ? 'remote' : 'local';
if (zfsget($remote, 'zfsrepl:primary')) {
say "wiping $type zfsrepl:primary" unless $opt->quiet;
zfscmd($remote, "inherit zfsrepl:primary tank/$name");
}
if (zfsget($remote, 'zfsrepl:peer')) {
say "wiping $type zfsrepl:peer" unless $opt->quiet;
zfscmd($remote, "inherit zfsrepl:peer tank/$name");
}
my %snap = zfssnaps($remote);
for my $ts (sort { $b cmp $a } keys %snap) {
say "Removing $type snapshot tank/$name\@$ts" unless $opt->quiet;
zfscmd($remote, "destroy tank/$name\@zfsrepl_$ts");
}
say "zfsrepl data removed on " . ($remote ? $peer : $hostname);
}
say "Filesystem tank/$name still exists on $peer" if ($peer and zfsexists(1));
}
sub do_setup () {
die "Already have zfsrepl:primary" if zfsget(0, 'zfsrepl:primary');
die "Already have zfsrepl:peer" if zfsget(0, 'zfsrepl:peer');
my %local = zfssnaps(0);
die "Already have local snapshots" if keys %local;
die "Need a peer" unless $peer;
if (zfsexists(1)) {
die "Peer filesystem already exists" unless $opt->force;
say "WIPING REMOTE FILESYSTEM tank/$name (--force)";
zfscmd(1, "destroy tank/$name");
}
my $now = DateTime->now->iso8601;
say "Creating first snapshot $now" unless $opt->quiet;
zfscmd(0, "snapshot tank/$name\@zfsrepl_$now");
print "Sending snapshot\n";
zfsinit($now);
say "Setting local peer to $peer";
zfsset(0, 'zfsrepl:peer', $peer);
zfsset(0, 'zfsrepl:primary', 'yes');
say "Replication for tank/$name set up to $peer";
}
sub do_sync () {
die "Need peer to sync" unless $peer;
my $primary = zfsget(0, 'zfsrepl:primary');
die "Unable to read primary status" unless $primary;
if ($primary eq 'yes') {
# force here means - create a new snapshot even if nothing has changed since last one
zfssync($opt->force);
}
else {
say "Not syncing, not primary" unless $opt->quiet;
}
}
sub do_down () {
die "Need peer to sync" unless $peer;
my $primary = zfsget(0, 'zfsrepl:primary');
die "Unable to read primary status" unless $primary;
if ($primary eq 'no') {
say "Already not primary on $hostname tank/$name" unless $opt->quiet;
return;
}
if (zfsget(0, 'readonly') ne 'on') {
say "Becoming read-only" unless $opt->quiet;
zfsset(0, 'readonly', 'on');
}
# sync data after becoming readonly to ensure no diff remains
zfssync($opt->force);
zfsset(0, 'zfsrepl:primary', 'no');
say "Primary shut down tank/$name, safe to bring up here or on $peer" unless $opt->quiet;
}
sub do_up () {
my $primary = zfsget(0, 'zfsrepl:primary');
die "Unable to read primary status" unless $primary;
if ($primary eq 'yes') {
say "Already primary on $hostname tank/$name" unless $opt->quiet;
return;
}
# make sure the OTHER end is in replica mode
my $other = zfsget(1, 'zfsrepl:primary') // 'unknown';
die "Other side is has primary state $other" if $other ne 'no';
# make sure other side is readonly
$other = zfsget(1, 'readonly') // 'unknown';
die "Other side is has readonly status $other" if $other ne 'on';
if (zfsget(0, 'readonly') eq 'on') {
say "Becoming read-write" unless $opt->quiet;
zfsset(0, 'readonly', 'off');
}
zfsset(0, 'zfsrepl:primary', 'yes');
say "$hostname is primary for tank/$name" unless $opt->quiet;
}
sub do_status () {
my $primary = zfsget(0, 'zfsrepl:primary') // 'no';
$exitcode = 3 unless $primary eq 'yes';
}
sub do_diff () {
my $primary = zfsget(0, 'zfsrepl:primary') // 'no';
die "Not primary" unless $primary eq 'yes';
my %local = zfssnaps(0);
my @rev = sort { $b cmp $a } keys %local;
my $latest = shift @rev;
# we use "system" here to print the output directly
system('zfs', 'diff', "tank/$name\@zfsrepl_$latest");
}
sub zfssync ($force) {
my %local = zfssnaps(0);
my %remote = zfssnaps(1);
# make sure we have a shared snapshot!
my @rev = sort { $b cmp $a } keys %local;
my ($latest) = grep { $remote{$_} } @rev;
die "No latest snapshot with remote $name" unless $latest;
# remove any local snapshots that aren't present remotely
for my $bogus (grep { not $remote{$_} } @rev) {
say "Removing spurious local snapshot $bogus" unless $opt->quiet;
zfscmd(0, "destroy tank/$name\@zfsrepl_$bogus");
delete $local{$bogus};
}
# remove any remote snapshots that aren't present locally
for my $bogus (grep { not $local{$_} } keys %remote) {
say "Removing spurious remote snapshot $bogus" unless $opt->quiet;
zfscmd(1, "destroy tank/$name\@zfsrepl_$bogus");
delete $remote{$bogus};
}
# now we're up to date, are there any bytes of data to sync?
if ($force or zfsget(0, "written\@zfsrepl_$latest")) {
my $now = DateTime->now->iso8601;
while ($local{$now}) {
say "Waiting a second, already have snapshot for $now" unless $opt->quiet;
sleep 1;
$now = DateTime->now->iso8601;
}
zfscmd(0, "snapshot tank/$name\@zfsrepl_$now");
zfssend($latest, $now);
my %all = (%local, %remote);
my @list = sort { $b cmp $a } keys %all;
shift @list for 1..4; # keep newest 4, yes I know I could use splice
for my $ts (@list) {
say "Removing old snapshot zfsrepl_$ts" unless $opt->quiet;
zfscmd(1, "destroy tank/$name\@zfsrepl_$ts") if $remote{$ts};
zfscmd(0, "destroy tank/$name\@zfsrepl_$ts") if $local{$ts};
}
} else {
say "No local changes, nothing to sync" if $opt->verbose;
}
}
sub runcmd ($remote, $destructive, $cmd) {
die "Trying to run a remote command with no peer" if ($remote and not $peer);
$cmd = "$sshcmd $cmd" if $remote;
my $do = (not $destructive or $opt->really);
if ($do) {
say "RUNNING: $cmd" if $opt->verbose;
my $result = `$cmd`;
Process::Status->assert_ok("$cmd:");
chomp($result);
return if $result eq '-';
if ($opt->verbose) {
say "=> $_" for split /\n/, $result;
}
return $result;
} else {
print "WOULD RUN: $cmd\n" if $opt->verbose;
$needreally = 1;
}
}
sub zfsget ($remote, $item) {
my $cmd = "zfs get -H -p -o value $item tank/$name";
return runcmd($remote, 0, $cmd);
}
sub zfsset ($remote, $item, $val) {
my $cmd = "zfs set $item=$val tank/$name";
return runcmd($remote, 1, $cmd);
}
sub zfsexists ($remote) {
my $cmd = "zfs list -H -p -o name -r tank";
my @fs = split /\n/, runcmd($remote, 0, $cmd);
return grep { $_ eq "tank/$name" } @fs;
}
sub zfssnaps ($remote) {
my $cmd = "zfs list -H -p -o name -t snapshot tank/$name";
my $result = runcmd($remote, 0, $cmd);
my @lines = split /\n/, $result;
my %res;
for (@lines) {
die "Weird output $_" unless m{^tank/$name\@zfsrepl_([0-9T:\-]+)$};
$res{$1} = 1;
}
return %res;
}
sub zfssend ($from, $to) {
die "Trying to run a remote command with no peer" if (not $peer);
my $cmd = "zfs send -L -I \@zfsrepl_$from tank/$name\@zfsrepl_$to | $sshcmd zfs receive tank/$name";
return runcmd(0, 1, $cmd);
}
sub zfsinit ($to) {
my $recordsize = zfsget(0, 'recordsize');
die "Trying to run a remote command with no peer" if (not $peer);
my $cmd = "zfs send -L tank/$name\@zfsrepl_$to | $sshcmd zfs receive -o recordsize=$recordsize -o mountpoint=/mnt/$name -o readonly=on -o zfsrepl:primary=no -o zfsrepl:peer=$hostname tank/$name";
return runcmd(0, 1, $cmd);
}
sub zfscmd ($remote, $todo) {
my $cmd = "zfs $todo";
return runcmd($remote, 1, $cmd);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment