-
-
Save SmartFinn/fd1abebf43fcb9ec66ed0f66131cec03 to your computer and use it in GitHub Desktop.
Replaces the duplicate files with the hard link to the file which comes first in the sorting
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# set -x # Uncomment to debug this shell script | |
set -o errexit \ | |
-o noclobber \ | |
-o pipefail | |
readonly PROGNAME="$(basename "$0")" | |
readonly VERSION="0.0.4" | |
declare -a FDUPES_OPTS=( "${FDUPES_OPTS[@]}" ) | |
declare -a LN_OPTS=( "${FDUPES_OPTS[@]}" ) | |
declare -i SHOW_DISK_USAGE="${SHOW_DISK_USAGE:-0}" | |
err() { | |
printf "%s: %b\n" "$PROGNAME" "$*" >&2 | |
} | |
usage() { | |
cat <<-EOF | |
This script replaces the duplicate files with hard links. | |
Usage: | |
$PROGNAME [options] DIRECTORY... | |
OPTIONS: | |
-r for every directory given follow subdirectories | |
encountered within | |
-R dir for each directory given after this option follow | |
subdirectories encountered within | |
-s follow symlinks | |
-H remap two and more hard links to one inode | |
-n exclude zero-length files from consideration | |
-A exclude hidden files from consideration | |
-o BY select sort order for output and deleting; by file | |
modification time (BY='time'; default), status | |
change time (BY='ctime'), or filename (BY='name') | |
-i reverse order while sorting | |
-U show the total size before and after deduplication | |
-V print $PROGNAME version and exit | |
-h show this help | |
KNOWN ISSUES: | |
* unable to deduplicate files that contain a newline in a filename | |
EOF | |
exit "${1:-2}" | |
} | |
get_total_size() { | |
du -csh "$@" | awk '$2 == "total" {print $1}' | |
} | |
fdedupes() { | |
local dup_file | |
local -a dup_files=() | |
# shellcheck disable=SC2162 | |
fdupes --sameline "${FDUPES_OPTS[@]}" -- "$@" | | |
while read -d $'\n' -a dup_files; do | |
[ -e "${dup_files[0]}" ] || continue | |
for dup_file in "${dup_files[@]:1}"; do | |
[ -e "$dup_file" ] || continue | |
ln -f "${LN_OPTS[@]}" -- "${dup_files[0]}" "$dup_file" || continue | |
done | |
done | |
} | |
while getopts ":AiHnrR:sUo:vVh" opt; do | |
case "$opt" in | |
A | \ | |
i | \ | |
H | \ | |
n | \ | |
s | \ | |
r ) FDUPES_OPTS+=("-$opt") ;; | |
R | \ | |
o ) FDUPES_OPTS+=("-$opt" "$OPTARG") ;; | |
U ) SHOW_DISK_USAGE=1 ;; | |
v ) LN_OPTS+=("-$opt") ;; | |
V ) printf "%s %s\n" "$PROGNAME" "$VERSION" | |
exit 0 | |
;; | |
h ) usage 0 ;; | |
: ) err "option requires an argument -- '-$OPTARG'" | |
usage | |
;; | |
\?) err "illegal option -- '-$OPTARG'" | |
usage | |
;; | |
esac | |
done | |
shift $((OPTIND-1)) | |
# Return an error if positional parameters are not found | |
if [ -z "$1" ]; then | |
err "no directories specified" | |
usage | |
fi | |
if [ "$SHOW_DISK_USAGE" -eq 1 ]; then | |
total_size_before="$(get_total_size "$@")" | |
fdedupes "$@" | |
total_size_after="$(get_total_size "$@")" | |
printf 'Disk usage has been reduced from %s to %s.\n' \ | |
"$total_size_before" "$total_size_after" | |
else | |
fdedupes "$@" | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment