Skip to content

Instantly share code, notes, and snippets.

@tanwald
Last active May 9, 2021 16:08
Show Gist options
  • Save tanwald/5dfdee5d27c29630800b40891b6285ee to your computer and use it in GitHub Desktop.
Save tanwald/5dfdee5d27c29630800b40891b6285ee to your computer and use it in GitHub Desktop.
glacier cli - commandline interface for amazon glacier backups
#!/usr/bin/env bash
########################################################################################################################
# USAGE
########################################################################################################################
function usage() {
echo "usage: glacier.sh [backup [init]] [clean [all]] [delete-all] [delete-archive ID] [doctor] [get KEY] "
echo " [help] [last-backup] [list-archives [all]] [list-vaults] [log [all]] [restore-snapshot] "
echo " [set KEY VALUE] [tree-hash ARCHIVE]"
echo " [debug GLACIER_CMD]"
echo " [cli AWS-CLI-CMD]"
echo " "
echo "glacier cli - commandline interface for amazon glacier backups"
echo " "
echo "optional arguments:"
printf "%-25s %s\n" " backup [init]" "backup configured vaults; init: treat all archives as being new"
printf "%-25s %s\n" " clean [all]" "remove temp files; all: reset log/ignore_tmp and remove orphans from inventory"
printf "%-25s %s\n" " delete-all" "delete all archives"
printf "%-25s %s\n" " delete-archive ID" "delete the archive with the given ID"
printf "%-25s %s\n" " doctor" "check preconditions"
printf "%-25s %s\n" " get KEY" "get configuration value for KEY"
printf "%-25s %s\n" " help" "show this message and exit"
printf "%-25s %s\n" " last-backup" "show date and time since last backup"
printf "%-25s %s\n" " list-archives [all]" "list uploaded archives; all: include deleted entries"
printf "%-25s %s\n" " list-vaults" "list vaults stored in inventory ('glacier cli list-vaults' for remote vaults)"
printf "%-25s %s\n" " log [all]" "show log of latest run; all: show all logs"
printf "%-25s %s\n" " restore-snapshot" "restore modification date if it got lost"
printf "%-25s %s\n" " set KEY VALUE" "set configuration KEY to VALUE"
printf "%-25s %s\n" " tree-hash ARCHIVE" "calculate the tree-hash of the given ARCHIVE"
printf "%-25s %s\n" " debug GLACIER-CMD" "run command without aws requests or inventory manipulation"
printf "%-25s %s\n" " cli AWS-CLI-CMD" "run native aws-cli glacier command"
exit
}
########################################################################################################################
# GLOBALS
########################################################################################################################
CONFIG=~/.aws/config
# retrieves config values from the awscli config file
# time seq 10 | xargs -I{} aws configure get glacier.aws_access_key_id
# $1: profile
# $2: key
# returns: config value
function get_config() {
sed -n "/\[profile $1\]/,/\[profile/p" $CONFIG | sed -nr "s/$2 = (.*)/\1/p"
}
# directory where the folders which mirror vaults are stored
GLACIER_VAULTS_ROOT=$(get_config glacier vaults_root)
# directories within the root folder which actually mirror vaults
GLACIER_VAULTS=$(get_config glacier vaults)
# s3 bucket for glacier inventory
GLACIER_S3_BUCKET=$(get_config glacier bucket)
# number of threads for parallel processing; 0 means all available threads
GLACIER_THREADS=$(get_config glacier threads)
GLACIER_THREADS=${GLACIER_THREADS:-0}
GLACIER_THREADS=$(($GLACIER_THREADS == 0 ? $(grep -Ec '^processor' /proc/cpuinfo) : $GLACIER_THREADS))
# number or concurrent uploads
GLACIER_UPLINKS=$(get_config glacier uplinks)
# number retries for file uploads
export GLACIER_UPLOAD_RETRY=$(get_config glacier upload_retry)
# folder for logs, local inventory and other data required by the script
export GLACIER_STORE=$GLACIER_VAULTS_ROOT/.glacier
# local glacier vaults inventory
GLACIER_INVENTORY=$GLACIER_STORE/glacier-inventory.csv
# ignore certain new/modified files and folders when searching an archive for changes
GLACIER_IGNORE=$GLACIER_STORE/glacier-ignore
# ignore archives which have already been uploaded during a prior run that completed with errors
GLACIER_IGNORE_TMP=$GLACIER_IGNORE.tmp
# main log file
export GLACIER_LOG=$GLACIER_STORE/glacier.log
# log file of latest run
export GLACIER_LOG_LATEST=$GLACIER_STORE/glacier-latest.log
# reference file for finding temporary script-artifacts
GLACIER_SNAPSHOT=$GLACIER_STORE/glacier-snapshot.log
# reference file for finding new archives and for getting the last backup time
GLACIER_SNAPSHOT_SUCCESS=$GLACIER_STORE/glacier-snapshot-success.log
# log file that stores all archives which are to be uploaded
LOG_NEW=$GLACIER_STORE/glacier-new.log
# tree hash calculation requires 1mb chunks of a file; size is given in bytes
PART_SIZE=1048576
# runs aws glacier commands without the need to provide consistent options
# returns: aws command result or DEBUG
function glacier() {
if [[ ! -v DEBUG ]]; then
aws glacier "$@" --profile glacier --account-id -
else
# json format for jq
echo '{"archiveId": "debug"}'
fi
}
export -f glacier
########################################################################################################################
# OUTPUT
########################################################################################################################
# logs formatted message
# $1: level
# $2: message
# $3/4: newline before/after message [0|1|]
# produces: log entry
# returns: formatted message
function log() {
{
[[ $3 ]] && [[ $3 -eq 1 ]] && echo
echo -e "[$1] $(date +%FT%T.%N): $2"
[[ $4 ]] && [[ $4 -eq 1 ]] && echo
} | tee -a $GLACIER_LOG $GLACIER_LOG_LATEST
}
export -f log
# logs messages coming form a pipe
# $1: level
# produces: log entries
# returns: formatted messages
function logx() {
xargs -n1 -I{} bash -c "log $1 '{}'"
}
# prints percentage and progress bar based on current/total ratio
# $1: current
# $2: total
# returns: percentage an progress bar (stderr)
PROGRESS_COLS=$(($(tput cols) - 10))
function progress() {
local CURRENT=$(($1 > $2 ? $2 : $1))
local TOTAL=$2
local FILL=$(($CURRENT * $PROGRESS_COLS / $TOTAL))
local EMPTY=$(($PROGRESS_COLS - $FILL))
printf "\r %3d.%1d%% [" $(($CURRENT * 100 / $TOTAL)) $((($CURRENT * 1000 / $TOTAL) % 10)) >&2
printf "%${FILL}s" '' | tr ' ' '=' >&2
printf "%${EMPTY}s" '' >&2
printf "]" >&2
}
# uses function 'progress' with lines coming from a pipe
# $1: total lines expected
# $2: optional checkpoint string
# returns: percentage and progress bar
function progress_pipe() {
local TOTAL=$1
local CHECKPOINT=$2
local CURRENT=0
while read -r LINE; do
{ [[ -z "$CHECKPOINT" ]] || [[ "$LINE" == "$CHECKPOINT" ]]; } && CURRENT=$(($CURRENT + 1))
# updating every 10th item avoids flickering
# [[ $(( $CURRENT % 10 )) -eq 0 ]] && progress $CURRENT $TOTAL
progress $CURRENT $TOTAL
done
# avoid checking if current -eq total
progress 1 1
echo >&2
}
########################################################################################################################
# INVENTORY
########################################################################################################################
# retrieves an item from inventory
# $1: archive path and/or id
# returns: archive path and id
function get_inventory_item() {
# escape '-' in order to avoid that the search term is interpreted as an option
local SEARCH_TERM=$(echo $1 | sed -r 's/-/\\-/g')
grep $SEARCH_TERM $GLACIER_INVENTORY | sed -r '/DELETED$/d'
}
# deletes an archive from glacier and inventory
# $1: archive id
function delete_archive() {
# vault/archive;12id3...
local INVENTORY_ITEM=$(get_inventory_item $1)
local ARCHIVE=${INVENTORY_ITEM%%\;*}
local ARCHIVE_ID=${INVENTORY_ITEM#*\;}
local VAULT=${ARCHIVE%%/*}
log "INFO" "deleting archive $ARCHIVE..."
glacier delete-archive --vault-name $VAULT --archive-id "'$ARCHIVE_ID'" >/dev/null
if [[ $? -eq 0 ]] || [[ -v DEBUG ]]; then
[[ ! -v DEBUG ]] && sed -ri "s/.*;$ARCHIVE_ID\$/\0;DELETED/" $GLACIER_INVENTORY
log "INFO" "deletion of $ARCHIVE completed"
else
log "ERROR" "deletion of archive failed; status code: $?"
fi
}
########################################################################################################################
# ARCHIVE MANIPULATION
########################################################################################################################
# removes artifacts which were created during the last run(s)
# $1: all flag
# produces: cleans vaults, [removes inventory-entries marked deleted, resets main log and ignored archives]
# returns: removed artifacts and other steps done
function clean() {
# wipe some glacier memories
if [[ "$1" == "all" ]]; then
# reset the main log; this has to be done first to see the log of the clean up
>$GLACIER_LOG && log "INFO" "reset '$GLACIER_LOG'"
# reset temporary ignored archives
>$GLACIER_IGNORE_TMP && log "INFO" "reset '$GLACIER_IGNORE_TMP'"
# clean inventory
sed -ri.bak '/DELETED$/d' $GLACIER_INVENTORY && log "INFO" "removed orphans from '$GLACIER_INVENTORY'"
# more verbose message if called manually with 'all'-flag to distinguish from steps before
log "INFO" "cleaning up temporary files..."
else
# short message if called automatically
log "INFO" "cleaning up..."
fi
# remove temporary files of last run
TEMP_FILES=$(find $GLACIER_VAULTS -mindepth 1 -maxdepth 1 -newer $GLACIER_SNAPSHOT)
echo $TEMP_FILES | xargs rm -vf | progress_pipe $(echo $TEMP_FILES | wc -w)
}
trap "clean; exit" INT KILL
# compresses multiple files in parallel
# $1: total number of files
# pipe: list of archives
# produces: compressed archives (folder-name-1.tar.gz [folder-name-2.tar.gz...])
# returns: progress information (only based on count not size)
function targz() {
xargs -n1 -P$GLACIER_THREADS -I{} tar -vczf {}.tar.gz {} | progress_pipe $1
}
# splits an archive into parts
# $1: path to the archive
# produces: archive-name.tar.gz.aa.part [archive-name.tar.gz.ab.part...]
function splitgz() {
local TOTAL=$(($(stat -c %s $1) / $PART_SIZE))
split --verbose --bytes=$PART_SIZE --additional-suffix=.part $1 $1. | progress_pipe $TOTAL
}
# searches for new and modified archives
# $1: backup mode
# produces: logfile containing the archives for upload
# returns: list of archives
function find_new_archives() {
if [[ "$1" == "init" ]]; then
# find all archives
find $GLACIER_VAULTS -mindepth 1 -maxdepth 1 |
grep -Evf $GLACIER_IGNORE -f $GLACIER_IGNORE_TMP >$LOG_NEW
else
# find only new and modified archives
# result of modified and new files is reduced to vault/archive; see footnote [1]
find $GLACIER_VAULTS -newer $GLACIER_SNAPSHOT_SUCCESS |
grep -Ev -f $GLACIER_IGNORE -f $GLACIER_IGNORE_TMP |
sed -rn 's%([^/]+/[^.][^/]+).*%\1%p' |
uniq >$LOG_NEW
fi
}
# compresses new or modified archives and stores them in a log file
# on first run all compressed archives - including existing archives - are stored
# $1: flag to indicate an initial run
# produces: logfile containing the compressed archives for upload
# returns: list of compressed archives
function compress_archives() {
log "INFO" "compressing archives using $GLACIER_THREADS threads..."
# compress all new files and folders which are not yet compressed
cat $LOG_NEW | grep -Ev '*.gz' | targz $(cat $LOG_NEW | xargs find | wc -l)
if [[ "$1" == "init" ]]; then
# find all archives
find $GLACIER_VAULTS -maxdepth 1 -name '*.gz' >$LOG_NEW
else
# find only new and modified archives
find $GLACIER_VAULTS -maxdepth 1 -newer $GLACIER_SNAPSHOT_SUCCESS -name '*.gz' >$LOG_NEW
fi
}
########################################################################################################################
# DIGEST
########################################################################################################################
# creates a binary sha256 checksum
# $1: file
# produces: binary hash-file
function checksum_binary() {
openssl dgst -sha256 -binary $1 >$1.hash
}
export -f checksum_binary
# creates a hexadecimal sha256 checksum
# $1: file
# returns: checksum
function checksum_hex() {
# file names with whitespaces (who knows) would cause wrong output with simple cut -d' ' -f2
openssl dgst -sha256 $1 | rev | cut -d' ' -f1 | rev
}
# creates a tree-hash
# $1: archive
# returns: tree-hash
function checksum_tree_hash() {
local ARCHIVE=$1
local TMP_HASH_FILE=$ARCHIVE.hash.tmp
local TREE_HASH=
local TOTAL=$(($(ls $ARCHIVE*.hash | wc -l) + 3))
local CURRENT=0
while [[ ! $TREE_HASH ]]; do
# ls produces an error if nothing is found; stderr is therefore redirected to a black hole
if [[ $(ls $ARCHIVE*.hash 2>/dev/null | wc -l) -le 2 ]]; then
TREE_HASH=$(cat $ARCHIVE*.hash | checksum_hex)
rm -f $ARCHIVE*.hash $TMP_HASH_FILE
fi
while read -r PAIR; do
# progress
CURRENT=$(($CURRENT + 1))
progress $CURRENT $TOTAL
# work
local PAIR_ARRAY=($PAIR)
if [ ${#PAIR_ARRAY[@]} -eq 2 ]; then
cat $PAIR | openssl dgst -sha256 -binary >$TMP_HASH_FILE
rm -f $PAIR
mv $TMP_HASH_FILE ${PAIR_ARRAY[0]}
fi
done < <(ls $ARCHIVE*.hash 2>/dev/null | xargs -n2 -P$GLACIER_THREADS)
done
progress 1 1
echo $TREE_HASH
}
# creates a tree-hash without uploading
# $1: archive
# returns: tree-hash
function checksum_standalone() {
log "INFO" "splitting file $1..."
splitgz $1
log "INFO" "creating initial hashes using $GLACIER_THREADS threads..."
local TOTAL=$(ls $1*.part | wc -l)
ls $1*.part | xargs -n1 -P$GLACIER_THREADS -I{} bash -c "checksum_binary '{}' && echo DONE" | progress_pipe $TOTAL
rm -f $1*.part
log "INFO" "creating tree hash using $GLACIER_THREADS threads..."
local TREE_HASH=$(checksum_tree_hash $1)
log "INFO" "tree-hash: $TREE_HASH"
}
########################################################################################################################
# UPLOAD
########################################################################################################################
# backup of inventory
function backup_inventory() {
log "INFO" "backing up inventory to s3://${GLACIER_S3_BUCKET}/..."
[[ ! -v DEBUG ]] && aws s3 cp $GLACIER_INVENTORY s3://${GLACIER_S3_BUCKET}/ --profile glacier >/dev/null
if [[ $? -eq 0 ]] || [[ -v DEBUG ]]; then
log "INFO" "upload of inventory completed"
else
log "ERROR" "upload of inventory failed; status code: $?"
fi
}
# aborts a multipart-upload
# $1: vault name
# $2: upload id
function abort_multipart() {
glacier abort-multipart-upload --vault-name $1 --upload-id $2 >/dev/null
}
# initiates a multipart-upload
# $1: vault name
# $2: archive name
# returns: upload id
function initiate_multipart() {
local VAULT=$1
local ARCHIVE_NAME=$2
local UPLOAD_ID=$(glacier initiate-multipart-upload \
--vault-name $VAULT \
--archive-description $ARCHIVE_NAME \
--part-size $PART_SIZE \
--query "uploadId" | tr -d '"')
if [[ $? -ne 0 ]]; then
log "ERROR" "initiation failed; upload id: $UPLOAD_ID; status code: $?"
[[ $UPLOAD_ID =~ [[:alnum:]] ]] && log "INFO" "aborting..." && abort_multipart $VAULT $UPLOAD_ID
fi
echo $UPLOAD_ID
}
# completes a multipart-upload
# $1: vault name
# $2: upload id
# $3: archive size
# $4: tree-hash
# returns: archive id
function complete_multipart() {
local ARCHIVE_ID=$(glacier complete-multipart-upload \
--vault-name $1 \
--upload-id $2 \
--archive-size $3 \
--checksum $4 \
--query "archiveId" | tr -d '"')
if [[ $? -ne 0 ]]; then
log "ERROR" "completion failed; status code: $?; aborting..."
abort_multipart $1 $2
fi
echo $ARCHIVE_ID
}
# uploads a single part of a larger archive
# $1: vault name
# $2: part path
# $3: upload id
# $4: start of the byte range within the archive
# $5: end of the byte range within archive
# produces: binary hash-files (part-name.hash)
function process_multipart_part() {
local VAULT=$1
local PART=$2
local UPLOAD_ID=$3
local START=$4
local END=$5
local RETRY=0
local STATUS=1
# upload-multipart-part
while [[ $STATUS -ne 0 ]] && [[ $RETRY -le $GLACIER_UPLOAD_RETRY ]]; do
# other part failed. breaking up...
grep -Eq "ERROR.*$UPLOAD_ID" $GLACIER_LOG_LATEST && break
glacier upload-multipart-part \
--vault-name $VAULT \
--upload-id $UPLOAD_ID \
--body $PART \
--range "bytes ${START}-${END}/*" &>/dev/null
STATUS=$?
if [[ $STATUS -ne 0 ]]; then
# aws cli returns only 255 on error; other codes might mean an interrupt or the like
[[ $STATUS -ne 255 ]] && break
# maybe a timeout...
log "WARNING" "upload of $PART failed; retry: $RETRY" >/dev/null # logfile only
RETRY=$(($RETRY + 1))
# have a break before retry
sleep $(($RETRY < $GLACIER_UPLOAD_RETRY / 2 ? 1 : $RETRY * $RETRY))s
fi
done
# create initial checksum
checksum_binary $PART
if [[ $STATUS -eq 0 ]]; then
rm -f $PART
elif ! grep -Eq "ERROR.*$UPLOAD_ID" $GLACIER_LOG_LATEST; then
log "ERROR" "upload of $PART for id $UPLOAD_ID failed; status code: $STATUS" >/dev/null
fi
}
export -f process_multipart_part
# uploads all parts of a multipart-upload-part
# $1: vault name
# $2: archive
# $3: archive size
# $4: upload id
# returns: status code
function upload_multipart() {
local VAULT=$1
local ARCHIVE=$2
local ARCHIVE_SIZE=$3
local UPLOAD_ID=$4
local TOTAL=$(ls $ARCHIVE*.part | wc -l)
local CURRENT=0
# show progress bar before first file is uploaded
progress 0 1
{
for PART in $(ls $ARCHIVE*.part); do
# other part failed. breaking up...
grep -Eq "ERROR.*$UPLOAD_ID" $GLACIER_LOG_LATEST && break
# calculate byte range and increment index
local START=$(($CURRENT * $PART_SIZE))
local CURRENT=$(($CURRENT + 1))
local END=$(($CURRENT * $PART_SIZE - 1))
END=$(($END > $ARCHIVE_SIZE - 1 ? $ARCHIVE_SIZE - 1 : $END))
echo "$VAULT $PART $UPLOAD_ID $START $END"
done
} | xargs -I{} -P$GLACIER_UPLINKS bash -c "process_multipart_part {} && echo DONE" | progress_pipe $TOTAL "DONE"
{ grep -Eq "ERROR.*$UPLOAD_ID" $GLACIER_LOG_LATEST && echo 1; } || echo 0
}
# uploads a single archive file which is smaller then 1mb
# $1: vault name
# $2: archive
# $3: archive description
# $4: checksum
# returns: archive id
function upload_single() {
local RETRY=0
local STATUS=1
local ARCHIVE_ID=
while [[ $STATUS -ne 0 ]] && [[ $RETRY -le $GLACIER_UPLOAD_RETRY ]]; do
ARCHIVE_ID=$(glacier upload-archive \
--vault-name $1 \
--body $2 \
--archive-description $3 \
--checksum $4 | jq .archiveId | tr -d '"') &>/dev/null
STATUS=$?
if [[ $STATUS -ne 0 ]]; then
# aws cli returns only 255 on error; other codes might mean an interrupt or the like
[[ $STATUS -ne 255 ]] && break
# maybe a timeout...
log "WARNING" "upload of $2 failed; retry: $RETRY" >/dev/null # logfile only
RETRY=$(($RETRY + 1))
# have a break before retry
sleep $(($RETRY < $GLACIER_UPLOAD_RETRY / 2 ? 1 : $RETRY * $RETRY))s
ARCHIVE_ID=
fi
done
echo $ARCHIVE_ID
}
# deletes modified archives, splits large archives and triggers the upload of new and modified archives
# produces: entries in inventory on successful upload
function upload() {
local TOTAL=$(cat $LOG_NEW | wc -l)
local CURRENT=0
# loop over new archives
while IFS= read -r ARCHIVE; do
local VAULT=${ARCHIVE%%/*}
local ARCHIVE_NAME=${ARCHIVE##*/}
local ARCHIVE_SIZE=$(stat -c %s $ARCHIVE)
CURRENT=$(($CURRENT + 1))
local ARCHIVE_ID=
local UPLOAD_ID=
local STATUS=1
echo -e "\033[1m[$CURRENT/$TOTAL ${ARCHIVE%%.*}]\033[0m"
log "INFO" "processing archive $ARCHIVE [$CURRENT/$TOTAL]"
if [[ $(get_inventory_item $ARCHIVE) =~ [[:alnum:]] ]]; then
log "INFO" "archive already exists in inventory"
delete_archive $ARCHIVE
fi
if [[ $ARCHIVE_SIZE -le $PART_SIZE ]]; then
# single part upload
log "INFO" "initiating single-file-upload"
log "INFO" "creating checksum..."
local CHECKSUM=$(checksum_hex $ARCHIVE)
log "INFO" "checksum: $CHECKSUM"
log "INFO" "uploading archive..."
ARCHIVE_ID=$(upload_single $VAULT $ARCHIVE $ARCHIVE_NAME $CHECKSUM)
else
# multipart upload
log "INFO" "splitting archive..."
splitgz $ARCHIVE
log "INFO" "initiating multi-part-upload..."
UPLOAD_ID=$(initiate_multipart $VAULT $ARCHIVE_NAME)
if [[ $UPLOAD_ID =~ [[:alnum:]] ]]; then
log "INFO" "starting multi-part-upload using $GLACIER_UPLINKS uplinks; upload id: $UPLOAD_ID"
STATUS=$(upload_multipart $VAULT $ARCHIVE $ARCHIVE_SIZE $UPLOAD_ID)
if [[ $STATUS -eq 0 ]] || [[ -v DEBUG ]]; then
log "INFO" "creating tree hash using $GLACIER_THREADS threads..."
local TREE_HASH=$(checksum_tree_hash $ARCHIVE)
log "INFO" "tree-hash: $TREE_HASH"
log "INFO" "completing upload..."
ARCHIVE_ID=$(complete_multipart $VAULT $UPLOAD_ID $ARCHIVE_SIZE $TREE_HASH)
else
log "ERROR" "multipart upload failed; status code: $STATUS; aborting..."
abort_multipart $VAULT $UPLOAD_ID
fi
fi
fi
if [[ $ARCHIVE_ID =~ [[:alnum:]] ]]; then
if [[ ! -v DEBUG ]]; then
# store in inventory and ignore temporarily
echo "$ARCHIVE;$ARCHIVE_ID" >>$GLACIER_INVENTORY
echo "${ARCHIVE%%.*}" >>$GLACIER_IGNORE_TMP
fi
log "INFO" "upload of $ARCHIVE completed; item stored in inventory"
else
log "ERROR" "upload of $ARCHIVE failed"
fi
done <$LOG_NEW
}
########################################################################################################################
# BACKUP
########################################################################################################################
# searches new or modified archives, compresses them and initiates the upload process
# $1: backup mode
# produces: file containing the archives for upload
function backup() {
local BACKUP_MODE=$1
[[ -z "$BACKUP_MODE" ]] && BACKUP_MODE="default"
log "INFO" "starting backup [$BACKUP_MODE]"
touch $GLACIER_SNAPSHOT
# search start
log "INFO" "searching new archives..."
find_new_archives "$BACKUP_MODE"
# search end
# compression start
local COUNT_NEW=$(cat $LOG_NEW | wc -l)
if [[ $COUNT_NEW -gt 0 ]]; then
log "INFO" "$COUNT_NEW new archives:"
cat $LOG_NEW | sort | xargs du -csh | logx "INFO"
else
log "INFO" "no new archives found; exit"
exit 0
fi
echo && read -p "is this ok? [y/N]: " PROCEED && echo
if [[ "$PROCEED" == "y" ]]; then
compress_archives "$BACKUP_MODE"
else
log "INFO" "operation aborted"
exit 0
fi
# compression end
# upload start
log "INFO" "$(cat $LOG_NEW | wc -l) archives for upload:"
cat $LOG_NEW | sort | xargs du -csh | logx "INFO"
echo && read -p "is this ok? [y/N]: " PROCEED && echo
if [[ "$PROCEED" == "y" ]]; then
log "INFO" "starting upload"
# upload all files stored in $LOG_NEW
upload
# always backup inventory
backup_inventory
if ! grep -q ERROR $GLACIER_LOG_LATEST; then
# reset ignored archives and create a snapshot; date is added to restore modification date if it got lost
[[ ! -v DEBUG ]] && >$GLACIER_IGNORE_TMP && date >$GLACIER_SNAPSHOT_SUCCESS
log "INFO" "backup completed" 0 1
else
log "WARNING" "backup completed with errors"
grep ERROR $GLACIER_LOG_LATEST
log "INFO" "successfully uploaded archives will be excluded until a backup completes without errors"
log "INFO" "include those archives by resetting $GLACIER_IGNORE_TMP; for example by 'glacier clean all'"
echo && read -p "retry failed uploads? [y/N]: " RETRY && echo
if [[ "$RETRY" == "y" ]]; then
clean
$0 backup $BACKUP_MODE
else
log "INFO" "operation aborted; successful uploads are stored in $GLACIER_IGNORE_TMP"
fi
fi
else
log "INFO" "operation aborted"
fi
#upload end
}
########################################################################################################################
# DOCTOR
########################################################################################################################
# checks preconditions
function doctor() {
# packages
echo -e "\033[1m[packages]\033[0m"
local AWS=$(aws --version 2>&1)
{ [[ $? -eq 0 ]] && echo "aws cli version: $AWS"; } || echo "aws cli version: [NOT FOUND]"
local JQ=$(jq --version 2>&1)
{ [[ $? -eq 0 ]] && echo "jq version: $JQ"; } || echo "jq version: [NOT FOUND]"
# config keys
local AWS_KEYS="aws_access_key_id aws_secret_access_key region"
local GLACIER_WRAPPER_KEYS="vaults vaults_root threads uplinks upload_retry bucket"
# aws cli config
echo -e "\033[1m[aws cli config]\033[0m"
for KEY in $(echo "$AWS_KEYS $GLACIER_WRAPPER_KEYS"); do
echo "$KEY: \"$(aws configure get $KEY --profile glacier)\"" | sed 's/""/[NOT FOUND]/'
done
# additional wrapper config
echo -e "\033[1m[glacier wrapper config]\033[0m"
for KEY in $(echo "$GLACIER_WRAPPER_KEYS"); do
echo "$KEY: \"$(get_config glacier $KEY)\"" | sed 's/""/[NOT FOUND]/'
done
for DIR in $GLACIER_VAULTS; do
DIR=$GLACIER_VAULTS_ROOT/$DIR
{ [[ -d $DIR ]] && echo "$DIR: exists"; } || echo "$DIR: [NOT FOUND]"
done
# ignored
echo -e "\033[1m[ignored files and folders]\033[0m"
{ [[ $(cat $GLACIER_IGNORE | wc -l) -gt 0 ]] && cat $GLACIER_IGNORE; } || echo "none"
echo -e "\033[1m[ignored archives]\033[0m"
{ [[ $(cat $GLACIER_IGNORE_TMP | wc -l) -gt 0 ]] && cat $GLACIER_IGNORE_TMP; } || echo "none"
# network
echo -e "\033[1m[network]\033[0m"
echo "response of request 'aws s3 ls s3://${GLACIER_S3_BUCKET}/ --profile glacier:"
aws s3 ls s3://${GLACIER_S3_BUCKET}/ --profile glacier
echo "response of request 'glacier list-vaults:'"
glacier list-vaults
}
########################################################################################################################
# MAIN
########################################################################################################################
# change to the directory containing folders which represent vaults
cd $GLACIER_VAULTS_ROOT
# set debug mode - or not
[[ "$1" == "debug" ]] && shift && export DEBUG=
########################################################################################################################
# USAGE
########################################################################################################################
if [[ "$1" == "help" ]] || [[ "$1" == "" ]]; then
usage
########################################################################################################################
# BACKUP
########################################################################################################################
elif [[ "$1" == "backup" ]]; then
[[ -z "$2" ]] || [[ "$2" == "init" ]] || usage
>$GLACIER_LOG_LATEST
if [[ $(cat $GLACIER_IGNORE_TMP | wc -l) -gt 0 ]]; then
log "WARNING" "local and remote vaults are inconsistent because of one or more incomplete backup runs"
log "WARNING" "archives already uploaded during the incomplete runs are excluded until the next complete backup"
log "INFO" "include those archives by resetting $GLACIER_IGNORE_TMP; for example by 'glacier clean all'"
echo && read -p "do you still want to proceed? [y/N]: " PROCEED && echo
if [[ "$PROCEED" != "y" ]]; then
exit 0
fi
fi
backup $2
# always clean up
clean
########################################################################################################################
# DOCTOR
########################################################################################################################
elif [[ "$1" == "doctor" ]]; then
doctor
########################################################################################################################
# CLEAN
########################################################################################################################
elif [[ "$1" == "clean" ]]; then
[[ "$2" == "" ]] || [[ "$2" == "all" ]] || usage
>$GLACIER_LOG_LATEST
clean $2
########################################################################################################################
# VIEW LOG
########################################################################################################################
elif [[ "$1" == "log" ]]; then
FILTER="$2$3"
FILTER=${FILTER#all}
[[ -n "$FILTER" ]] || FILTER="$"
if [[ "$2" == "all" ]]; then
grep -i "$FILTER" $GLACIER_LOG
else
grep -i "$FILTER" $GLACIER_LOG_LATEST
fi
########################################################################################################################
# LIST ARCHIVES
########################################################################################################################
elif [[ "$1" == "list-archives" ]]; then
[[ "$2" == "" ]] || [[ "$2" == "all" ]] || usage
if [[ "$2" == "all" ]]; then
cat $GLACIER_INVENTORY | sort | column -ts ';'
else
cat $GLACIER_INVENTORY | sed -r '/DELETED$/d' | sort | column -ts ';'
fi
########################################################################################################################
# LIST VAULTS
########################################################################################################################
elif [[ "$1" == "list-vaults" ]]; then
cat $GLACIER_INVENTORY | sed -r '/DELETED$/d' | sed -r 's/^([^/]+)[/].*/\1/' | sort -u
########################################################################################################################
# LAST BACKUP
########################################################################################################################
elif [[ "$1" == "last-backup" ]]; then
LAST=$(stat -c %y $GLACIER_SNAPSHOT_SUCCESS | cut -d'.' -f1)
echo "$((($(date +'%s') - $(date -d "$LAST" +'%s')) / 86400)) days ago ($LAST)"
########################################################################################################################
# RESTORE SNAPSHOT
########################################################################################################################
elif [[ "$1" == "restore-snapshot" ]]; then
>$GLACIER_LOG_LATEST
LAST=$(cat $GLACIER_SNAPSHOT_SUCCESS)
echo $LAST | xargs -I{} touch -d '{}' $GLACIER_SNAPSHOT_SUCCESS && log "INFO" "restored modification date to $LAST"
########################################################################################################################
# DELETE SINGLE
########################################################################################################################
elif [[ "$1" == "delete-archive" ]]; then
[[ -n "$2" ]] || usage
>$GLACIER_LOG_LATEST
log "INFO" "archive to delete: $(get_inventory_item $2 | sed -r 's/^([^;]+);.*/\1/')"
echo && read -p "is this ok? [y/N]: " PROCEED && echo
if [[ "$PROCEED" == "y" ]]; then
delete_archive $2
backup_inventory
else
log "INFO" "operation aborted"
fi
########################################################################################################################
# DELETE ALL
########################################################################################################################
elif [[ "$1" == "delete-all" ]]; then
>$GLACIER_LOG_LATEST
log "INFO" "archives to delete:"
cat $GLACIER_INVENTORY | sed -r '/DELETED$/d' | sort | logx "INFO"
echo && read -p "is this ok? [y/N]: " PROCEED && echo
if [[ "$PROCEED" == "y" ]]; then
for ARCHIVE in $(cat $GLACIER_INVENTORY | sed -r '/DELETED$/d' | sort); do
delete_archive $ARCHIVE
done
backup_inventory
else
log "INFO" "operation aborted"
fi
########################################################################################################################
# TREE HASH
########################################################################################################################
elif [[ "$1" == "tree-hash" ]]; then
[[ -n "$2" ]] || usage
>$GLACIER_LOG_LATEST
cd - >/dev/null # can be done in any directory
checksum_standalone $2
########################################################################################################################
# GET/SET CONFIG
########################################################################################################################
elif [[ "$1" == "get" ]]; then
[[ -n "$2" ]] || usage
get_config glacier $2
elif [[ "$1" == "set" ]]; then
{ [[ -n "$2" ]] && [[ -n "$3" ]]; } || usage
aws configure set "$2" "$3" --profile=glacier
########################################################################################################################
# GLACIER CMD
########################################################################################################################
elif [[ "$1" == "cli" ]]; then
cd - >/dev/null # can be done in any directory
shift
if [[ "$BASH_ARGV" == "help" ]]; then
# last argument is 'help'; cannot use function 'glacier' as it includes possibly interfering options
aws glacier "$@"
else
glacier "$@"
fi
########################################################################################################################
# USAGE (AGAIN)
########################################################################################################################
else
usage
fi
########################################################################################################################
# FOOTNOTES
########################################################################################################################
# [1]
# it initially also finds new files and folders deep down the hierarchy and then cuts the path at level 2
# vault/new-folder1/new-file
# vault/old-folder/old-folder
# vault/new-folder2/directory/new-directory/new-file1
# vault/new-folder2/directory/new-directory/new-file2
# vault/.new-hidden-folder/...
# vault/new-file
# =>
# vault/new-folder1/new-file
# vault/new-folder2/directory/new-directory/new-file1
# vault/new-folder2/directory/new-directory/new-file2
# vault/.new-hidden-folder/...
# vault/new-file
# =>
# vault/new-folder1/
# vault/new-folder2/
# vault/new-file
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment