Skip to content

Instantly share code, notes, and snippets.

@SmartFinn
Forked from aravindkumarsvg/delete-dupfiles.sh
Last active January 8, 2020 17:44
Show Gist options
  • Save SmartFinn/fd1abebf43fcb9ec66ed0f66131cec03 to your computer and use it in GitHub Desktop.
Save SmartFinn/fd1abebf43fcb9ec66ed0f66131cec03 to your computer and use it in GitHub Desktop.
Replaces the duplicate files with the hard link to the file which comes first in the sorting
#!/usr/bin/env bash
# set -x # Uncomment to debug this shell script
set -o errexit \
-o noclobber \
-o pipefail
readonly PROGNAME="$(basename "$0")"
readonly VERSION="0.0.4"
declare -a FDUPES_OPTS=( "${FDUPES_OPTS[@]}" )
declare -a LN_OPTS=( "${FDUPES_OPTS[@]}" )
declare -i SHOW_DISK_USAGE="${SHOW_DISK_USAGE:-0}"
err() {
printf "%s: %b\n" "$PROGNAME" "$*" >&2
}
usage() {
cat <<-EOF
This script replaces the duplicate files with hard links.
Usage:
$PROGNAME [options] DIRECTORY...
OPTIONS:
-r for every directory given follow subdirectories
encountered within
-R dir for each directory given after this option follow
subdirectories encountered within
-s follow symlinks
-H remap two and more hard links to one inode
-n exclude zero-length files from consideration
-A exclude hidden files from consideration
-o BY select sort order for output and deleting; by file
modification time (BY='time'; default), status
change time (BY='ctime'), or filename (BY='name')
-i reverse order while sorting
-U show the total size before and after deduplication
-V print $PROGNAME version and exit
-h show this help
KNOWN ISSUES:
* unable to deduplicate files that contain a newline in a filename
EOF
exit "${1:-2}"
}
get_total_size() {
du -csh "$@" | awk '$2 == "total" {print $1}'
}
fdedupes() {
local dup_file
local -a dup_files=()
# shellcheck disable=SC2162
fdupes --sameline "${FDUPES_OPTS[@]}" -- "$@" |
while read -d $'\n' -a dup_files; do
[ -e "${dup_files[0]}" ] || continue
for dup_file in "${dup_files[@]:1}"; do
[ -e "$dup_file" ] || continue
ln -f "${LN_OPTS[@]}" -- "${dup_files[0]}" "$dup_file" || continue
done
done
}
while getopts ":AiHnrR:sUo:vVh" opt; do
case "$opt" in
A | \
i | \
H | \
n | \
s | \
r ) FDUPES_OPTS+=("-$opt") ;;
R | \
o ) FDUPES_OPTS+=("-$opt" "$OPTARG") ;;
U ) SHOW_DISK_USAGE=1 ;;
v ) LN_OPTS+=("-$opt") ;;
V ) printf "%s %s\n" "$PROGNAME" "$VERSION"
exit 0
;;
h ) usage 0 ;;
: ) err "option requires an argument -- '-$OPTARG'"
usage
;;
\?) err "illegal option -- '-$OPTARG'"
usage
;;
esac
done
shift $((OPTIND-1))
# Return an error if positional parameters are not found
if [ -z "$1" ]; then
err "no directories specified"
usage
fi
if [ "$SHOW_DISK_USAGE" -eq 1 ]; then
total_size_before="$(get_total_size "$@")"
fdedupes "$@"
total_size_after="$(get_total_size "$@")"
printf 'Disk usage has been reduced from %s to %s.\n' \
"$total_size_before" "$total_size_after"
else
fdedupes "$@"
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment