Skip to content

Instantly share code, notes, and snippets.

@cathode
Created March 28, 2017 22:09
Show Gist options
  • Save cathode/6f2bdb5c5587da4ac8c7a313c4905c4b to your computer and use it in GitHub Desktop.
Save cathode/6f2bdb5c5587da4ac8c7a313c4905c4b to your computer and use it in GitHub Desktop.
#!/usr/local/bin/python3.4
# ZFS backup to NAS script
import sys
import os
import socket
import subprocess
# bring in custom/shared stuff
import hatools
from shutil import rmtree
from datetime import datetime, timedelta
from subprocess import call
# ==== BEGIN CONFIGURABLES ====
# Where backups will be written.
# This should be the path where NAS backup target is mounted locally
targetBase = '/mnt/d2d'
# remote path
remoteBase = 'pdx-d2d:/nas/zfsdata'
# Default ZFS datasets to backup
# (recursive; all children will be included)
datasets = ['zroot']
# Use this prefix for snapshots
snapPrefix = 'backup-'
# Name of the base snapshot
snapBaseName = 'backup-base'
# Use this file extension on the NAS target for zfs send streams
fileExt = '.zstream'
# path to zfs binary
zfs = '/sbin/zfs'
# ==== END CONFIGURABLES ====
# defaults
subcommand = 'backup'
# Check cluster role before running backup. Only run backup from primary
# because the secondary can't have snapshots created except by the replication
# process.
ha = hatools.HaCluster()
if not ha.GetCurrentRole() == hatools.RolePrimary:
print(":NOTICE: Current node is not primary, exiting.")
sys.exit()
# check if backup already running
pidpath = "/var/run/zbackup.pid"
def pidExists(pid):
pid = int(pid)
if pid < 0:
return False
try:
os.kill(pid, 0)
except ProcessLookupError:
return False
except PermissionError:
return True
else:
return True
# Ensure only one copy of backup script is invoked at a time
# Adapted from:
# https://shoaibmir.wordpress.com/2009/12/14/pid-lock-file-in-python/
if os.access(pidpath, os.F_OK):
#if the lockfile is already there then check the PID number
#in the lock file
with open(pidpath, "r") as pidfile:
pidfile.seek(0)
oldPid = pidfile.read().replace('\n', '')
if oldPid == '':
oldPid = '-1'
# Now we check the PID from lock file matches to the current
# process PID
if pidExists(oldPid):
print("Existing backup process already running (PID " + oldPid + ")")
sys.exit(1)
else:
os.remove(pidpath)
#This is part of code where we put a PID file in the lock file
with open(pidpath, "w") as pidfile:
pidfile.write("%s" % os.getpid())
# Execution time tracking
start = datetime.now()
print(":DEBUG: == Begin process @ " + start.strftime("%Y-%m-%d %H:%M:%S") + " ==")
# Command line parsing
if len(sys.argv) > 1:
arg = str(sys.argv[1]).lower()
if arg == 'backup':
subcommand = 'backup'
elif arg == 'init' or arg == 'initialize':
subcommand = 'init'
elif arg == 'rotate':
subcommand = 'rotate'
elif arg == 'clear':
subcommand = 'clear'
# Are there additional arguments (datasets specified)
if len(sys.argv) > 2:
datasets.clear()
for i in range(2, len(sys.argv)):
datasets.append(sys.argv[i])
print(":: Performing " + subcommand + " on datasets: " + ", ".join(datasets))
else:
print(":ERROR: No command specified.")
# check the base path to make sure we can mount the remote there
if not os.path.isdir(targetBase):
os.mkdir(targetBase)
remotePath = remoteBase + '/' + socket.gethostname()
# mount target path
print(":: Mounting remote " + remotePath + " at local: " + targetBase)
call(['mount', '-w', '-t', 'nfs', remotePath, targetBase])
# iterate over each dataset to perform operation on
for ds in datasets:
# Path where the backup is saved
# e.g. /mnt/d2d/tank
targetPath = os.path.join(targetBase, ds)
# Path where the rotated backups are held
rotatePath = targetPath + "_previous"
# Path where the restore script is written
scriptPath = os.path.join(targetPath, "restore-" + ds.replace('/', '_') + ".sh")
if subcommand == 'init':
print(":: Initializing " + ds)
# Create the necessary subfolders in the backup location
if not os.path.isdir(targetPath):
print(":: Creating " + targetPath)
os.makedirs(targetPath, 0o770, True)
# Create the restore script template
if not os.path.isfile(scriptPath):
print(":: Creating empty restore script: " + scriptPath)
with open(scriptPath, "w") as scriptFile:
scriptFile.writelines(["#!/bin/sh\n", "\n", '# Dataset name to restore to may be passed as a parameter:\n', 'destfs=${1:-' + ds + '}\n'])
os.chmod(scriptPath, 0o750)
elif subcommand == 'clear':
print(":: Purging existing backup-related snapshots for " + ds + ".")
p = subprocess.Popen([zfs, 'list', '-p', '-H', '-o', 'name', '-t', 'snapshot', '-d', '1', '-r', ds], stdout=subprocess.PIPE)
out = p.communicate()[0].decode('utf-8').splitlines()
backupSnaps = [x for x in out if x.startswith(ds + "@" + snapPrefix)]
for s in backupSnaps:
call([zfs, 'destroy', '-rv', s])
elif subcommand == 'rotate':
print(":: Rotating backup for " + ds)
# Check if the backup has been initialized
if not os.path.isfile(scriptPath):
print(":WARNING: '" + ds + "' is not initialized. Backup rotation skipped.")
else:
# Sanity check -- ensure that we aren't rotating an initialized yet
# empty current backup
items = [f for f in os.listdir(targetPath) if str(f).endswith(fileExt)]
if len(items) == 0:
print(":WARNING: No backups have yet been made for '" + ds + "'. Backup rotation skipped.")
else:
# Drop the old previous directory
if os.path.isdir(rotatePath):
rmtree(rotatePath)
# Move the current target dir to the rotation dir
os.rename(targetPath, rotatePath)
# Purge existing snapshots for backups
print(":: Purging existing backup-related snapshots for " + ds + ".")
p = subprocess.Popen([zfs, 'list', '-p', '-H', '-o', 'name', '-t', 'snapshot', '-d', '1', '-r', ds], stdout=subprocess.PIPE)
out = p.communicate()[0].decode('utf-8').splitlines()
backupSnaps = [x for x in out if x.startswith(ds + "@" + snapPrefix)]
for s in backupSnaps:
call([zfs, 'destroy', '-rv', s])
print(":: Moved " + targetPath + " to " + rotatePath + ", please reinitialize before running another backup.")
elif subcommand == 'backup':
print(":: Backing up " + ds + " to " + targetPath)
# check if a full snapshot exists
p = subprocess.Popen([zfs, 'list', '-p', '-H', '-o', 'name,creation,ws:backup_state', '-s', 'creation', '-t', 'snapshot', '-d', '1', '-r', ds], stdout=subprocess.PIPE)
out = p.communicate()[0].decode('utf-8').splitlines()
#snaps = [x.split('\t') for x in out]
snaps = [ [y[0], datetime.fromtimestamp(float(y[1])), y[2]] for y in [x.split('\t') for x in out] if y[0].startswith(ds + "@" + snapPrefix)]
# build the name of the 'baseline' snapshot for this dataset
fullsnap = ds + "@" + snapBaseName
baseFile = os.path.join(targetPath, fullsnap.replace("/", "_") + fileExt)
incsnap = fullsnap
idx = -1
backupType = 'full'
# Check if a base snapshot has been taken
if fullsnap in [s[0] for s in snaps]:
idx = [s[0] for s in snaps].index(fullsnap)
# Has the full backup been written out?
if os.path.isfile(baseFile):
backupType = 'incremental'
else:
# Take snapshot to use for backup baseline
call([zfs, 'snapshot', '-r', fullsnap])
# detect if full backup is written out
if backupType == 'full':
print(":: Performing full backup to " + baseFile)
# write 'restore script' to backup location
with open(scriptPath, "a") as scriptFile:
scriptFile.writelines(["#Restore base:\n", "zfs recv -Fv $destfs < " + baseFile + "\n"])
# Mark snapshot backup state
call([zfs, 'set', 'ws:backup_state=inprogress', fullsnap])
cmd = zfs + ' send -Re ' + fullsnap + ' | mbuffer -s 512k -m 256M -o ' + baseFile
subprocess.Popen(cmd, shell=True).communicate()
# finalize -- mark snapshot backup as complete
call([zfs, 'set', 'ws:backup_state=complete', fullsnap])
else:
print(":: Performing incremental backup.")
#src = incsnap
# Find the next existing snapshot that hasn't been backed up:
base = snaps[idx]
filtered = [s for s in snaps if s[1] > base[1] and s[2] == '-']
completed = [s for s in snaps if s[1] > base[1] and s[2] == 'complete']
if len(completed) == 0:
oldsnap = base[0]
else:
oldsnap = completed[len(completed) - 1][0]
newsnap = ''
if len(filtered) == 0:
# take a snapshot
newsnap = ds + "@" + snapPrefix + datetime.now().strftime('%Y.%m.%d-%H.%M')
call([zfs, 'snapshot', '-r', newsnap])
else:
newsnap = filtered[0][0]
incFilePath = oldsnap + "_to_" + newsnap + fileExt
incFilePath = incFilePath.replace('/', '_')
incfile = os.path.join(targetPath, incFilePath)
# write 'restore script' to backup location
with open(scriptPath, "a") as scriptFile:
scriptFile.writelines(["# Restore incremental:\n", "zfs recv -Fv $destfs < " + incfile + "\n"])
# set backup state before starting
call([zfs, 'set', 'ws:backup_state=inprogress', newsnap])
cmd = zfs + " send -Re -I " + oldsnap + " " + newsnap + " | mbuffer -s 512k -m 1G -o " + incfile
#print(":DEBUG: " + cmd)
subprocess.Popen(cmd, shell=True).communicate()
# set backup state to complete after backup file written out:
call([zfs, 'set', 'ws:backup_state=complete', newsnap])
else:
print(":ERROR: Unknown subcommand - " + subcommand)
# Unmount remote
print(":: Unmounting remote")
call(['umount', targetBase])
# Cleanup pidfile
os.remove(pidpath)
# execution time tracking
end = datetime.now()
elapsed = str(end - start)
print(":DEBUG: Backup process complete @ " + end.strftime("%Y-%m-%d %H:%M:%S") + " (elapsed: " + elapsed + ") ==")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment