Last active
May 9, 2021 16:08
-
-
Save tanwald/5dfdee5d27c29630800b40891b6285ee to your computer and use it in GitHub Desktop.
glacier cli - commandline interface for amazon glacier backups
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
######################################################################################################################## | |
# USAGE | |
######################################################################################################################## | |
function usage() { | |
echo "usage: glacier.sh [backup [init]] [clean [all]] [delete-all] [delete-archive ID] [doctor] [get KEY] " | |
echo " [help] [last-backup] [list-archives [all]] [list-vaults] [log [all]] [restore-snapshot] " | |
echo " [set KEY VALUE] [tree-hash ARCHIVE]" | |
echo " [debug GLACIER_CMD]" | |
echo " [cli AWS-CLI-CMD]" | |
echo " " | |
echo "glacier cli - commandline interface for amazon glacier backups" | |
echo " " | |
echo "optional arguments:" | |
printf "%-25s %s\n" " backup [init]" "backup configured vaults; init: treat all archives as being new" | |
printf "%-25s %s\n" " clean [all]" "remove temp files; all: reset log/ignore_tmp and remove orphans from inventory" | |
printf "%-25s %s\n" " delete-all" "delete all archives" | |
printf "%-25s %s\n" " delete-archive ID" "delete the archive with the given ID" | |
printf "%-25s %s\n" " doctor" "check preconditions" | |
printf "%-25s %s\n" " get KEY" "get configuration value for KEY" | |
printf "%-25s %s\n" " help" "show this message and exit" | |
printf "%-25s %s\n" " last-backup" "show date and time since last backup" | |
printf "%-25s %s\n" " list-archives [all]" "list uploaded archives; all: include deleted entries" | |
printf "%-25s %s\n" " list-vaults" "list vaults stored in inventory ('glacier cli list-vaults' for remote vaults)" | |
printf "%-25s %s\n" " log [all]" "show log of latest run; all: show all logs" | |
printf "%-25s %s\n" " restore-snapshot" "restore modification date if it got lost" | |
printf "%-25s %s\n" " set KEY VALUE" "set configuration KEY to VALUE" | |
printf "%-25s %s\n" " tree-hash ARCHIVE" "calculate the tree-hash of the given ARCHIVE" | |
printf "%-25s %s\n" " debug GLACIER-CMD" "run command without aws requests or inventory manipulation" | |
printf "%-25s %s\n" " cli AWS-CLI-CMD" "run native aws-cli glacier command" | |
exit | |
} | |
######################################################################################################################## | |
# GLOBALS | |
######################################################################################################################## | |
CONFIG=~/.aws/config | |
# retrieves config values from the awscli config file | |
# time seq 10 | xargs -I{} aws configure get glacier.aws_access_key_id | |
# $1: profile | |
# $2: key | |
# returns: config value | |
function get_config() { | |
sed -n "/\[profile $1\]/,/\[profile/p" $CONFIG | sed -nr "s/$2 = (.*)/\1/p" | |
} | |
# directory where the folders which mirror vaults are stored | |
GLACIER_VAULTS_ROOT=$(get_config glacier vaults_root) | |
# directories within the root folder which actually mirror vaults | |
GLACIER_VAULTS=$(get_config glacier vaults) | |
# s3 bucket for glacier inventory | |
GLACIER_S3_BUCKET=$(get_config glacier bucket) | |
# number of threads for parallel processing; 0 means all available threads | |
GLACIER_THREADS=$(get_config glacier threads) | |
GLACIER_THREADS=${GLACIER_THREADS:-0} | |
GLACIER_THREADS=$(($GLACIER_THREADS == 0 ? $(grep -Ec '^processor' /proc/cpuinfo) : $GLACIER_THREADS)) | |
# number or concurrent uploads | |
GLACIER_UPLINKS=$(get_config glacier uplinks) | |
# number retries for file uploads | |
export GLACIER_UPLOAD_RETRY=$(get_config glacier upload_retry) | |
# folder for logs, local inventory and other data required by the script | |
export GLACIER_STORE=$GLACIER_VAULTS_ROOT/.glacier | |
# local glacier vaults inventory | |
GLACIER_INVENTORY=$GLACIER_STORE/glacier-inventory.csv | |
# ignore certain new/modified files and folders when searching an archive for changes | |
GLACIER_IGNORE=$GLACIER_STORE/glacier-ignore | |
# ignore archives which have already been uploaded during a prior run that completed with errors | |
GLACIER_IGNORE_TMP=$GLACIER_IGNORE.tmp | |
# main log file | |
export GLACIER_LOG=$GLACIER_STORE/glacier.log | |
# log file of latest run | |
export GLACIER_LOG_LATEST=$GLACIER_STORE/glacier-latest.log | |
# reference file for finding temporary script-artifacts | |
GLACIER_SNAPSHOT=$GLACIER_STORE/glacier-snapshot.log | |
# reference file for finding new archives and for getting the last backup time | |
GLACIER_SNAPSHOT_SUCCESS=$GLACIER_STORE/glacier-snapshot-success.log | |
# log file that stores all archives which are to be uploaded | |
LOG_NEW=$GLACIER_STORE/glacier-new.log | |
# tree hash calculation requires 1mb chunks of a file; size is given in bytes | |
PART_SIZE=1048576 | |
# runs aws glacier commands without the need to provide consistent options | |
# returns: aws command result or DEBUG | |
function glacier() { | |
if [[ ! -v DEBUG ]]; then | |
aws glacier "$@" --profile glacier --account-id - | |
else | |
# json format for jq | |
echo '{"archiveId": "debug"}' | |
fi | |
} | |
export -f glacier | |
######################################################################################################################## | |
# OUTPUT | |
######################################################################################################################## | |
# logs formatted message | |
# $1: level | |
# $2: message | |
# $3/4: newline before/after message [0|1|] | |
# produces: log entry | |
# returns: formatted message | |
function log() { | |
{ | |
[[ $3 ]] && [[ $3 -eq 1 ]] && echo | |
echo -e "[$1] $(date +%FT%T.%N): $2" | |
[[ $4 ]] && [[ $4 -eq 1 ]] && echo | |
} | tee -a $GLACIER_LOG $GLACIER_LOG_LATEST | |
} | |
export -f log | |
# logs messages coming form a pipe | |
# $1: level | |
# produces: log entries | |
# returns: formatted messages | |
function logx() { | |
xargs -n1 -I{} bash -c "log $1 '{}'" | |
} | |
# prints percentage and progress bar based on current/total ratio | |
# $1: current | |
# $2: total | |
# returns: percentage an progress bar (stderr) | |
PROGRESS_COLS=$(($(tput cols) - 10)) | |
function progress() { | |
local CURRENT=$(($1 > $2 ? $2 : $1)) | |
local TOTAL=$2 | |
local FILL=$(($CURRENT * $PROGRESS_COLS / $TOTAL)) | |
local EMPTY=$(($PROGRESS_COLS - $FILL)) | |
printf "\r %3d.%1d%% [" $(($CURRENT * 100 / $TOTAL)) $((($CURRENT * 1000 / $TOTAL) % 10)) >&2 | |
printf "%${FILL}s" '' | tr ' ' '=' >&2 | |
printf "%${EMPTY}s" '' >&2 | |
printf "]" >&2 | |
} | |
# uses function 'progress' with lines coming from a pipe | |
# $1: total lines expected | |
# $2: optional checkpoint string | |
# returns: percentage and progress bar | |
function progress_pipe() { | |
local TOTAL=$1 | |
local CHECKPOINT=$2 | |
local CURRENT=0 | |
while read -r LINE; do | |
{ [[ -z "$CHECKPOINT" ]] || [[ "$LINE" == "$CHECKPOINT" ]]; } && CURRENT=$(($CURRENT + 1)) | |
# updating every 10th item avoids flickering | |
# [[ $(( $CURRENT % 10 )) -eq 0 ]] && progress $CURRENT $TOTAL | |
progress $CURRENT $TOTAL | |
done | |
# avoid checking if current -eq total | |
progress 1 1 | |
echo >&2 | |
} | |
######################################################################################################################## | |
# INVENTORY | |
######################################################################################################################## | |
# retrieves an item from inventory | |
# $1: archive path and/or id | |
# returns: archive path and id | |
function get_inventory_item() { | |
# escape '-' in order to avoid that the search term is interpreted as an option | |
local SEARCH_TERM=$(echo $1 | sed -r 's/-/\\-/g') | |
grep $SEARCH_TERM $GLACIER_INVENTORY | sed -r '/DELETED$/d' | |
} | |
# deletes an archive from glacier and inventory | |
# $1: archive id | |
function delete_archive() { | |
# vault/archive;12id3... | |
local INVENTORY_ITEM=$(get_inventory_item $1) | |
local ARCHIVE=${INVENTORY_ITEM%%\;*} | |
local ARCHIVE_ID=${INVENTORY_ITEM#*\;} | |
local VAULT=${ARCHIVE%%/*} | |
log "INFO" "deleting archive $ARCHIVE..." | |
glacier delete-archive --vault-name $VAULT --archive-id "'$ARCHIVE_ID'" >/dev/null | |
if [[ $? -eq 0 ]] || [[ -v DEBUG ]]; then | |
[[ ! -v DEBUG ]] && sed -ri "s/.*;$ARCHIVE_ID\$/\0;DELETED/" $GLACIER_INVENTORY | |
log "INFO" "deletion of $ARCHIVE completed" | |
else | |
log "ERROR" "deletion of archive failed; status code: $?" | |
fi | |
} | |
######################################################################################################################## | |
# ARCHIVE MANIPULATION | |
######################################################################################################################## | |
# removes artifacts which were created during the last run(s) | |
# $1: all flag | |
# produces: cleans vaults, [removes inventory-entries marked deleted, resets main log and ignored archives] | |
# returns: removed artifacts and other steps done | |
function clean() { | |
# wipe some glacier memories | |
if [[ "$1" == "all" ]]; then | |
# reset the main log; this has to be done first to see the log of the clean up | |
>$GLACIER_LOG && log "INFO" "reset '$GLACIER_LOG'" | |
# reset temporary ignored archives | |
>$GLACIER_IGNORE_TMP && log "INFO" "reset '$GLACIER_IGNORE_TMP'" | |
# clean inventory | |
sed -ri.bak '/DELETED$/d' $GLACIER_INVENTORY && log "INFO" "removed orphans from '$GLACIER_INVENTORY'" | |
# more verbose message if called manually with 'all'-flag to distinguish from steps before | |
log "INFO" "cleaning up temporary files..." | |
else | |
# short message if called automatically | |
log "INFO" "cleaning up..." | |
fi | |
# remove temporary files of last run | |
TEMP_FILES=$(find $GLACIER_VAULTS -mindepth 1 -maxdepth 1 -newer $GLACIER_SNAPSHOT) | |
echo $TEMP_FILES | xargs rm -vf | progress_pipe $(echo $TEMP_FILES | wc -w) | |
} | |
trap "clean; exit" INT KILL | |
# compresses multiple files in parallel | |
# $1: total number of files | |
# pipe: list of archives | |
# produces: compressed archives (folder-name-1.tar.gz [folder-name-2.tar.gz...]) | |
# returns: progress information (only based on count not size) | |
function targz() { | |
xargs -n1 -P$GLACIER_THREADS -I{} tar -vczf {}.tar.gz {} | progress_pipe $1 | |
} | |
# splits an archive into parts | |
# $1: path to the archive | |
# produces: archive-name.tar.gz.aa.part [archive-name.tar.gz.ab.part...] | |
function splitgz() { | |
local TOTAL=$(($(stat -c %s $1) / $PART_SIZE)) | |
split --verbose --bytes=$PART_SIZE --additional-suffix=.part $1 $1. | progress_pipe $TOTAL | |
} | |
# searches for new and modified archives | |
# $1: backup mode | |
# produces: logfile containing the archives for upload | |
# returns: list of archives | |
function find_new_archives() { | |
if [[ "$1" == "init" ]]; then | |
# find all archives | |
find $GLACIER_VAULTS -mindepth 1 -maxdepth 1 | | |
grep -Evf $GLACIER_IGNORE -f $GLACIER_IGNORE_TMP >$LOG_NEW | |
else | |
# find only new and modified archives | |
# result of modified and new files is reduced to vault/archive; see footnote [1] | |
find $GLACIER_VAULTS -newer $GLACIER_SNAPSHOT_SUCCESS | | |
grep -Ev -f $GLACIER_IGNORE -f $GLACIER_IGNORE_TMP | | |
sed -rn 's%([^/]+/[^.][^/]+).*%\1%p' | | |
uniq >$LOG_NEW | |
fi | |
} | |
# compresses new or modified archives and stores them in a log file | |
# on first run all compressed archives - including existing archives - are stored | |
# $1: flag to indicate an initial run | |
# produces: logfile containing the compressed archives for upload | |
# returns: list of compressed archives | |
function compress_archives() { | |
log "INFO" "compressing archives using $GLACIER_THREADS threads..." | |
# compress all new files and folders which are not yet compressed | |
cat $LOG_NEW | grep -Ev '*.gz' | targz $(cat $LOG_NEW | xargs find | wc -l) | |
if [[ "$1" == "init" ]]; then | |
# find all archives | |
find $GLACIER_VAULTS -maxdepth 1 -name '*.gz' >$LOG_NEW | |
else | |
# find only new and modified archives | |
find $GLACIER_VAULTS -maxdepth 1 -newer $GLACIER_SNAPSHOT_SUCCESS -name '*.gz' >$LOG_NEW | |
fi | |
} | |
######################################################################################################################## | |
# DIGEST | |
######################################################################################################################## | |
# creates a binary sha256 checksum | |
# $1: file | |
# produces: binary hash-file | |
function checksum_binary() { | |
openssl dgst -sha256 -binary $1 >$1.hash | |
} | |
export -f checksum_binary | |
# creates a hexadecimal sha256 checksum | |
# $1: file | |
# returns: checksum | |
function checksum_hex() { | |
# file names with whitespaces (who knows) would cause wrong output with simple cut -d' ' -f2 | |
openssl dgst -sha256 $1 | rev | cut -d' ' -f1 | rev | |
} | |
# creates a tree-hash | |
# $1: archive | |
# returns: tree-hash | |
function checksum_tree_hash() { | |
local ARCHIVE=$1 | |
local TMP_HASH_FILE=$ARCHIVE.hash.tmp | |
local TREE_HASH= | |
local TOTAL=$(($(ls $ARCHIVE*.hash | wc -l) + 3)) | |
local CURRENT=0 | |
while [[ ! $TREE_HASH ]]; do | |
# ls produces an error if nothing is found; stderr is therefore redirected to a black hole | |
if [[ $(ls $ARCHIVE*.hash 2>/dev/null | wc -l) -le 2 ]]; then | |
TREE_HASH=$(cat $ARCHIVE*.hash | checksum_hex) | |
rm -f $ARCHIVE*.hash $TMP_HASH_FILE | |
fi | |
while read -r PAIR; do | |
# progress | |
CURRENT=$(($CURRENT + 1)) | |
progress $CURRENT $TOTAL | |
# work | |
local PAIR_ARRAY=($PAIR) | |
if [ ${#PAIR_ARRAY[@]} -eq 2 ]; then | |
cat $PAIR | openssl dgst -sha256 -binary >$TMP_HASH_FILE | |
rm -f $PAIR | |
mv $TMP_HASH_FILE ${PAIR_ARRAY[0]} | |
fi | |
done < <(ls $ARCHIVE*.hash 2>/dev/null | xargs -n2 -P$GLACIER_THREADS) | |
done | |
progress 1 1 | |
echo $TREE_HASH | |
} | |
# creates a tree-hash without uploading | |
# $1: archive | |
# returns: tree-hash | |
function checksum_standalone() { | |
log "INFO" "splitting file $1..." | |
splitgz $1 | |
log "INFO" "creating initial hashes using $GLACIER_THREADS threads..." | |
local TOTAL=$(ls $1*.part | wc -l) | |
ls $1*.part | xargs -n1 -P$GLACIER_THREADS -I{} bash -c "checksum_binary '{}' && echo DONE" | progress_pipe $TOTAL | |
rm -f $1*.part | |
log "INFO" "creating tree hash using $GLACIER_THREADS threads..." | |
local TREE_HASH=$(checksum_tree_hash $1) | |
log "INFO" "tree-hash: $TREE_HASH" | |
} | |
######################################################################################################################## | |
# UPLOAD | |
######################################################################################################################## | |
# backup of inventory | |
function backup_inventory() { | |
log "INFO" "backing up inventory to s3://${GLACIER_S3_BUCKET}/..." | |
[[ ! -v DEBUG ]] && aws s3 cp $GLACIER_INVENTORY s3://${GLACIER_S3_BUCKET}/ --profile glacier >/dev/null | |
if [[ $? -eq 0 ]] || [[ -v DEBUG ]]; then | |
log "INFO" "upload of inventory completed" | |
else | |
log "ERROR" "upload of inventory failed; status code: $?" | |
fi | |
} | |
# aborts a multipart-upload | |
# $1: vault name | |
# $2: upload id | |
function abort_multipart() { | |
glacier abort-multipart-upload --vault-name $1 --upload-id $2 >/dev/null | |
} | |
# initiates a multipart-upload | |
# $1: vault name | |
# $2: archive name | |
# returns: upload id | |
function initiate_multipart() { | |
local VAULT=$1 | |
local ARCHIVE_NAME=$2 | |
local UPLOAD_ID=$(glacier initiate-multipart-upload \ | |
--vault-name $VAULT \ | |
--archive-description $ARCHIVE_NAME \ | |
--part-size $PART_SIZE \ | |
--query "uploadId" | tr -d '"') | |
if [[ $? -ne 0 ]]; then | |
log "ERROR" "initiation failed; upload id: $UPLOAD_ID; status code: $?" | |
[[ $UPLOAD_ID =~ [[:alnum:]] ]] && log "INFO" "aborting..." && abort_multipart $VAULT $UPLOAD_ID | |
fi | |
echo $UPLOAD_ID | |
} | |
# completes a multipart-upload | |
# $1: vault name | |
# $2: upload id | |
# $3: archive size | |
# $4: tree-hash | |
# returns: archive id | |
function complete_multipart() { | |
local ARCHIVE_ID=$(glacier complete-multipart-upload \ | |
--vault-name $1 \ | |
--upload-id $2 \ | |
--archive-size $3 \ | |
--checksum $4 \ | |
--query "archiveId" | tr -d '"') | |
if [[ $? -ne 0 ]]; then | |
log "ERROR" "completion failed; status code: $?; aborting..." | |
abort_multipart $1 $2 | |
fi | |
echo $ARCHIVE_ID | |
} | |
# uploads a single part of a larger archive | |
# $1: vault name | |
# $2: part path | |
# $3: upload id | |
# $4: start of the byte range within the archive | |
# $5: end of the byte range within archive | |
# produces: binary hash-files (part-name.hash) | |
function process_multipart_part() { | |
local VAULT=$1 | |
local PART=$2 | |
local UPLOAD_ID=$3 | |
local START=$4 | |
local END=$5 | |
local RETRY=0 | |
local STATUS=1 | |
# upload-multipart-part | |
while [[ $STATUS -ne 0 ]] && [[ $RETRY -le $GLACIER_UPLOAD_RETRY ]]; do | |
# other part failed. breaking up... | |
grep -Eq "ERROR.*$UPLOAD_ID" $GLACIER_LOG_LATEST && break | |
glacier upload-multipart-part \ | |
--vault-name $VAULT \ | |
--upload-id $UPLOAD_ID \ | |
--body $PART \ | |
--range "bytes ${START}-${END}/*" &>/dev/null | |
STATUS=$? | |
if [[ $STATUS -ne 0 ]]; then | |
# aws cli returns only 255 on error; other codes might mean an interrupt or the like | |
[[ $STATUS -ne 255 ]] && break | |
# maybe a timeout... | |
log "WARNING" "upload of $PART failed; retry: $RETRY" >/dev/null # logfile only | |
RETRY=$(($RETRY + 1)) | |
# have a break before retry | |
sleep $(($RETRY < $GLACIER_UPLOAD_RETRY / 2 ? 1 : $RETRY * $RETRY))s | |
fi | |
done | |
# create initial checksum | |
checksum_binary $PART | |
if [[ $STATUS -eq 0 ]]; then | |
rm -f $PART | |
elif ! grep -Eq "ERROR.*$UPLOAD_ID" $GLACIER_LOG_LATEST; then | |
log "ERROR" "upload of $PART for id $UPLOAD_ID failed; status code: $STATUS" >/dev/null | |
fi | |
} | |
export -f process_multipart_part | |
# uploads all parts of a multipart-upload-part | |
# $1: vault name | |
# $2: archive | |
# $3: archive size | |
# $4: upload id | |
# returns: status code | |
function upload_multipart() { | |
local VAULT=$1 | |
local ARCHIVE=$2 | |
local ARCHIVE_SIZE=$3 | |
local UPLOAD_ID=$4 | |
local TOTAL=$(ls $ARCHIVE*.part | wc -l) | |
local CURRENT=0 | |
# show progress bar before first file is uploaded | |
progress 0 1 | |
{ | |
for PART in $(ls $ARCHIVE*.part); do | |
# other part failed. breaking up... | |
grep -Eq "ERROR.*$UPLOAD_ID" $GLACIER_LOG_LATEST && break | |
# calculate byte range and increment index | |
local START=$(($CURRENT * $PART_SIZE)) | |
local CURRENT=$(($CURRENT + 1)) | |
local END=$(($CURRENT * $PART_SIZE - 1)) | |
END=$(($END > $ARCHIVE_SIZE - 1 ? $ARCHIVE_SIZE - 1 : $END)) | |
echo "$VAULT $PART $UPLOAD_ID $START $END" | |
done | |
} | xargs -I{} -P$GLACIER_UPLINKS bash -c "process_multipart_part {} && echo DONE" | progress_pipe $TOTAL "DONE" | |
{ grep -Eq "ERROR.*$UPLOAD_ID" $GLACIER_LOG_LATEST && echo 1; } || echo 0 | |
} | |
# uploads a single archive file which is smaller then 1mb | |
# $1: vault name | |
# $2: archive | |
# $3: archive description | |
# $4: checksum | |
# returns: archive id | |
function upload_single() { | |
local RETRY=0 | |
local STATUS=1 | |
local ARCHIVE_ID= | |
while [[ $STATUS -ne 0 ]] && [[ $RETRY -le $GLACIER_UPLOAD_RETRY ]]; do | |
ARCHIVE_ID=$(glacier upload-archive \ | |
--vault-name $1 \ | |
--body $2 \ | |
--archive-description $3 \ | |
--checksum $4 | jq .archiveId | tr -d '"') &>/dev/null | |
STATUS=$? | |
if [[ $STATUS -ne 0 ]]; then | |
# aws cli returns only 255 on error; other codes might mean an interrupt or the like | |
[[ $STATUS -ne 255 ]] && break | |
# maybe a timeout... | |
log "WARNING" "upload of $2 failed; retry: $RETRY" >/dev/null # logfile only | |
RETRY=$(($RETRY + 1)) | |
# have a break before retry | |
sleep $(($RETRY < $GLACIER_UPLOAD_RETRY / 2 ? 1 : $RETRY * $RETRY))s | |
ARCHIVE_ID= | |
fi | |
done | |
echo $ARCHIVE_ID | |
} | |
# deletes modified archives, splits large archives and triggers the upload of new and modified archives | |
# produces: entries in inventory on successful upload | |
function upload() { | |
local TOTAL=$(cat $LOG_NEW | wc -l) | |
local CURRENT=0 | |
# loop over new archives | |
while IFS= read -r ARCHIVE; do | |
local VAULT=${ARCHIVE%%/*} | |
local ARCHIVE_NAME=${ARCHIVE##*/} | |
local ARCHIVE_SIZE=$(stat -c %s $ARCHIVE) | |
CURRENT=$(($CURRENT + 1)) | |
local ARCHIVE_ID= | |
local UPLOAD_ID= | |
local STATUS=1 | |
echo -e "\033[1m[$CURRENT/$TOTAL ${ARCHIVE%%.*}]\033[0m" | |
log "INFO" "processing archive $ARCHIVE [$CURRENT/$TOTAL]" | |
if [[ $(get_inventory_item $ARCHIVE) =~ [[:alnum:]] ]]; then | |
log "INFO" "archive already exists in inventory" | |
delete_archive $ARCHIVE | |
fi | |
if [[ $ARCHIVE_SIZE -le $PART_SIZE ]]; then | |
# single part upload | |
log "INFO" "initiating single-file-upload" | |
log "INFO" "creating checksum..." | |
local CHECKSUM=$(checksum_hex $ARCHIVE) | |
log "INFO" "checksum: $CHECKSUM" | |
log "INFO" "uploading archive..." | |
ARCHIVE_ID=$(upload_single $VAULT $ARCHIVE $ARCHIVE_NAME $CHECKSUM) | |
else | |
# multipart upload | |
log "INFO" "splitting archive..." | |
splitgz $ARCHIVE | |
log "INFO" "initiating multi-part-upload..." | |
UPLOAD_ID=$(initiate_multipart $VAULT $ARCHIVE_NAME) | |
if [[ $UPLOAD_ID =~ [[:alnum:]] ]]; then | |
log "INFO" "starting multi-part-upload using $GLACIER_UPLINKS uplinks; upload id: $UPLOAD_ID" | |
STATUS=$(upload_multipart $VAULT $ARCHIVE $ARCHIVE_SIZE $UPLOAD_ID) | |
if [[ $STATUS -eq 0 ]] || [[ -v DEBUG ]]; then | |
log "INFO" "creating tree hash using $GLACIER_THREADS threads..." | |
local TREE_HASH=$(checksum_tree_hash $ARCHIVE) | |
log "INFO" "tree-hash: $TREE_HASH" | |
log "INFO" "completing upload..." | |
ARCHIVE_ID=$(complete_multipart $VAULT $UPLOAD_ID $ARCHIVE_SIZE $TREE_HASH) | |
else | |
log "ERROR" "multipart upload failed; status code: $STATUS; aborting..." | |
abort_multipart $VAULT $UPLOAD_ID | |
fi | |
fi | |
fi | |
if [[ $ARCHIVE_ID =~ [[:alnum:]] ]]; then | |
if [[ ! -v DEBUG ]]; then | |
# store in inventory and ignore temporarily | |
echo "$ARCHIVE;$ARCHIVE_ID" >>$GLACIER_INVENTORY | |
echo "${ARCHIVE%%.*}" >>$GLACIER_IGNORE_TMP | |
fi | |
log "INFO" "upload of $ARCHIVE completed; item stored in inventory" | |
else | |
log "ERROR" "upload of $ARCHIVE failed" | |
fi | |
done <$LOG_NEW | |
} | |
######################################################################################################################## | |
# BACKUP | |
######################################################################################################################## | |
# searches new or modified archives, compresses them and initiates the upload process | |
# $1: backup mode | |
# produces: file containing the archives for upload | |
function backup() { | |
local BACKUP_MODE=$1 | |
[[ -z "$BACKUP_MODE" ]] && BACKUP_MODE="default" | |
log "INFO" "starting backup [$BACKUP_MODE]" | |
touch $GLACIER_SNAPSHOT | |
# search start | |
log "INFO" "searching new archives..." | |
find_new_archives "$BACKUP_MODE" | |
# search end | |
# compression start | |
local COUNT_NEW=$(cat $LOG_NEW | wc -l) | |
if [[ $COUNT_NEW -gt 0 ]]; then | |
log "INFO" "$COUNT_NEW new archives:" | |
cat $LOG_NEW | sort | xargs du -csh | logx "INFO" | |
else | |
log "INFO" "no new archives found; exit" | |
exit 0 | |
fi | |
echo && read -p "is this ok? [y/N]: " PROCEED && echo | |
if [[ "$PROCEED" == "y" ]]; then | |
compress_archives "$BACKUP_MODE" | |
else | |
log "INFO" "operation aborted" | |
exit 0 | |
fi | |
# compression end | |
# upload start | |
log "INFO" "$(cat $LOG_NEW | wc -l) archives for upload:" | |
cat $LOG_NEW | sort | xargs du -csh | logx "INFO" | |
echo && read -p "is this ok? [y/N]: " PROCEED && echo | |
if [[ "$PROCEED" == "y" ]]; then | |
log "INFO" "starting upload" | |
# upload all files stored in $LOG_NEW | |
upload | |
# always backup inventory | |
backup_inventory | |
if ! grep -q ERROR $GLACIER_LOG_LATEST; then | |
# reset ignored archives and create a snapshot; date is added to restore modification date if it got lost | |
[[ ! -v DEBUG ]] && >$GLACIER_IGNORE_TMP && date >$GLACIER_SNAPSHOT_SUCCESS | |
log "INFO" "backup completed" 0 1 | |
else | |
log "WARNING" "backup completed with errors" | |
grep ERROR $GLACIER_LOG_LATEST | |
log "INFO" "successfully uploaded archives will be excluded until a backup completes without errors" | |
log "INFO" "include those archives by resetting $GLACIER_IGNORE_TMP; for example by 'glacier clean all'" | |
echo && read -p "retry failed uploads? [y/N]: " RETRY && echo | |
if [[ "$RETRY" == "y" ]]; then | |
clean | |
$0 backup $BACKUP_MODE | |
else | |
log "INFO" "operation aborted; successful uploads are stored in $GLACIER_IGNORE_TMP" | |
fi | |
fi | |
else | |
log "INFO" "operation aborted" | |
fi | |
#upload end | |
} | |
######################################################################################################################## | |
# DOCTOR | |
######################################################################################################################## | |
# checks preconditions | |
function doctor() { | |
# packages | |
echo -e "\033[1m[packages]\033[0m" | |
local AWS=$(aws --version 2>&1) | |
{ [[ $? -eq 0 ]] && echo "aws cli version: $AWS"; } || echo "aws cli version: [NOT FOUND]" | |
local JQ=$(jq --version 2>&1) | |
{ [[ $? -eq 0 ]] && echo "jq version: $JQ"; } || echo "jq version: [NOT FOUND]" | |
# config keys | |
local AWS_KEYS="aws_access_key_id aws_secret_access_key region" | |
local GLACIER_WRAPPER_KEYS="vaults vaults_root threads uplinks upload_retry bucket" | |
# aws cli config | |
echo -e "\033[1m[aws cli config]\033[0m" | |
for KEY in $(echo "$AWS_KEYS $GLACIER_WRAPPER_KEYS"); do | |
echo "$KEY: \"$(aws configure get $KEY --profile glacier)\"" | sed 's/""/[NOT FOUND]/' | |
done | |
# additional wrapper config | |
echo -e "\033[1m[glacier wrapper config]\033[0m" | |
for KEY in $(echo "$GLACIER_WRAPPER_KEYS"); do | |
echo "$KEY: \"$(get_config glacier $KEY)\"" | sed 's/""/[NOT FOUND]/' | |
done | |
for DIR in $GLACIER_VAULTS; do | |
DIR=$GLACIER_VAULTS_ROOT/$DIR | |
{ [[ -d $DIR ]] && echo "$DIR: exists"; } || echo "$DIR: [NOT FOUND]" | |
done | |
# ignored | |
echo -e "\033[1m[ignored files and folders]\033[0m" | |
{ [[ $(cat $GLACIER_IGNORE | wc -l) -gt 0 ]] && cat $GLACIER_IGNORE; } || echo "none" | |
echo -e "\033[1m[ignored archives]\033[0m" | |
{ [[ $(cat $GLACIER_IGNORE_TMP | wc -l) -gt 0 ]] && cat $GLACIER_IGNORE_TMP; } || echo "none" | |
# network | |
echo -e "\033[1m[network]\033[0m" | |
echo "response of request 'aws s3 ls s3://${GLACIER_S3_BUCKET}/ --profile glacier:" | |
aws s3 ls s3://${GLACIER_S3_BUCKET}/ --profile glacier | |
echo "response of request 'glacier list-vaults:'" | |
glacier list-vaults | |
} | |
######################################################################################################################## | |
# MAIN | |
######################################################################################################################## | |
# change to the directory containing folders which represent vaults | |
cd $GLACIER_VAULTS_ROOT | |
# set debug mode - or not | |
[[ "$1" == "debug" ]] && shift && export DEBUG= | |
######################################################################################################################## | |
# USAGE | |
######################################################################################################################## | |
if [[ "$1" == "help" ]] || [[ "$1" == "" ]]; then | |
usage | |
######################################################################################################################## | |
# BACKUP | |
######################################################################################################################## | |
elif [[ "$1" == "backup" ]]; then | |
[[ -z "$2" ]] || [[ "$2" == "init" ]] || usage | |
>$GLACIER_LOG_LATEST | |
if [[ $(cat $GLACIER_IGNORE_TMP | wc -l) -gt 0 ]]; then | |
log "WARNING" "local and remote vaults are inconsistent because of one or more incomplete backup runs" | |
log "WARNING" "archives already uploaded during the incomplete runs are excluded until the next complete backup" | |
log "INFO" "include those archives by resetting $GLACIER_IGNORE_TMP; for example by 'glacier clean all'" | |
echo && read -p "do you still want to proceed? [y/N]: " PROCEED && echo | |
if [[ "$PROCEED" != "y" ]]; then | |
exit 0 | |
fi | |
fi | |
backup $2 | |
# always clean up | |
clean | |
######################################################################################################################## | |
# DOCTOR | |
######################################################################################################################## | |
elif [[ "$1" == "doctor" ]]; then | |
doctor | |
######################################################################################################################## | |
# CLEAN | |
######################################################################################################################## | |
elif [[ "$1" == "clean" ]]; then | |
[[ "$2" == "" ]] || [[ "$2" == "all" ]] || usage | |
>$GLACIER_LOG_LATEST | |
clean $2 | |
######################################################################################################################## | |
# VIEW LOG | |
######################################################################################################################## | |
elif [[ "$1" == "log" ]]; then | |
FILTER="$2$3" | |
FILTER=${FILTER#all} | |
[[ -n "$FILTER" ]] || FILTER="$" | |
if [[ "$2" == "all" ]]; then | |
grep -i "$FILTER" $GLACIER_LOG | |
else | |
grep -i "$FILTER" $GLACIER_LOG_LATEST | |
fi | |
######################################################################################################################## | |
# LIST ARCHIVES | |
######################################################################################################################## | |
elif [[ "$1" == "list-archives" ]]; then | |
[[ "$2" == "" ]] || [[ "$2" == "all" ]] || usage | |
if [[ "$2" == "all" ]]; then | |
cat $GLACIER_INVENTORY | sort | column -ts ';' | |
else | |
cat $GLACIER_INVENTORY | sed -r '/DELETED$/d' | sort | column -ts ';' | |
fi | |
######################################################################################################################## | |
# LIST VAULTS | |
######################################################################################################################## | |
elif [[ "$1" == "list-vaults" ]]; then | |
cat $GLACIER_INVENTORY | sed -r '/DELETED$/d' | sed -r 's/^([^/]+)[/].*/\1/' | sort -u | |
######################################################################################################################## | |
# LAST BACKUP | |
######################################################################################################################## | |
elif [[ "$1" == "last-backup" ]]; then | |
LAST=$(stat -c %y $GLACIER_SNAPSHOT_SUCCESS | cut -d'.' -f1) | |
echo "$((($(date +'%s') - $(date -d "$LAST" +'%s')) / 86400)) days ago ($LAST)" | |
######################################################################################################################## | |
# RESTORE SNAPSHOT | |
######################################################################################################################## | |
elif [[ "$1" == "restore-snapshot" ]]; then | |
>$GLACIER_LOG_LATEST | |
LAST=$(cat $GLACIER_SNAPSHOT_SUCCESS) | |
echo $LAST | xargs -I{} touch -d '{}' $GLACIER_SNAPSHOT_SUCCESS && log "INFO" "restored modification date to $LAST" | |
######################################################################################################################## | |
# DELETE SINGLE | |
######################################################################################################################## | |
elif [[ "$1" == "delete-archive" ]]; then | |
[[ -n "$2" ]] || usage | |
>$GLACIER_LOG_LATEST | |
log "INFO" "archive to delete: $(get_inventory_item $2 | sed -r 's/^([^;]+);.*/\1/')" | |
echo && read -p "is this ok? [y/N]: " PROCEED && echo | |
if [[ "$PROCEED" == "y" ]]; then | |
delete_archive $2 | |
backup_inventory | |
else | |
log "INFO" "operation aborted" | |
fi | |
######################################################################################################################## | |
# DELETE ALL | |
######################################################################################################################## | |
elif [[ "$1" == "delete-all" ]]; then | |
>$GLACIER_LOG_LATEST | |
log "INFO" "archives to delete:" | |
cat $GLACIER_INVENTORY | sed -r '/DELETED$/d' | sort | logx "INFO" | |
echo && read -p "is this ok? [y/N]: " PROCEED && echo | |
if [[ "$PROCEED" == "y" ]]; then | |
for ARCHIVE in $(cat $GLACIER_INVENTORY | sed -r '/DELETED$/d' | sort); do | |
delete_archive $ARCHIVE | |
done | |
backup_inventory | |
else | |
log "INFO" "operation aborted" | |
fi | |
######################################################################################################################## | |
# TREE HASH | |
######################################################################################################################## | |
elif [[ "$1" == "tree-hash" ]]; then | |
[[ -n "$2" ]] || usage | |
>$GLACIER_LOG_LATEST | |
cd - >/dev/null # can be done in any directory | |
checksum_standalone $2 | |
######################################################################################################################## | |
# GET/SET CONFIG | |
######################################################################################################################## | |
elif [[ "$1" == "get" ]]; then | |
[[ -n "$2" ]] || usage | |
get_config glacier $2 | |
elif [[ "$1" == "set" ]]; then | |
{ [[ -n "$2" ]] && [[ -n "$3" ]]; } || usage | |
aws configure set "$2" "$3" --profile=glacier | |
######################################################################################################################## | |
# GLACIER CMD | |
######################################################################################################################## | |
elif [[ "$1" == "cli" ]]; then | |
cd - >/dev/null # can be done in any directory | |
shift | |
if [[ "$BASH_ARGV" == "help" ]]; then | |
# last argument is 'help'; cannot use function 'glacier' as it includes possibly interfering options | |
aws glacier "$@" | |
else | |
glacier "$@" | |
fi | |
######################################################################################################################## | |
# USAGE (AGAIN) | |
######################################################################################################################## | |
else | |
usage | |
fi | |
######################################################################################################################## | |
# FOOTNOTES | |
######################################################################################################################## | |
# [1] | |
# it initially also finds new files and folders deep down the hierarchy and then cuts the path at level 2 | |
# vault/new-folder1/new-file | |
# vault/old-folder/old-folder | |
# vault/new-folder2/directory/new-directory/new-file1 | |
# vault/new-folder2/directory/new-directory/new-file2 | |
# vault/.new-hidden-folder/... | |
# vault/new-file | |
# => | |
# vault/new-folder1/new-file | |
# vault/new-folder2/directory/new-directory/new-file1 | |
# vault/new-folder2/directory/new-directory/new-file2 | |
# vault/.new-hidden-folder/... | |
# vault/new-file | |
# => | |
# vault/new-folder1/ | |
# vault/new-folder2/ | |
# vault/new-file |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment