Created
November 26, 2021 01:35
-
-
Save flakey-bit/ee3ba052ee3e86cb4a0de32a84bedc78 to your computer and use it in GitHub Desktop.
Using bash to remove duplicate files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get all duplicate files, sorted by hash | |
find . ! -empty -type f -exec md5sum {} + | sort | uniq -w32 -dD | sort | |
# For each file that is duplicated, extract the filename to keep | |
find . ! -empty -type f -exec md5sum {} + | sort | uniq -w32 -dD | sort | uniq --check-chars 32 | cut -c 35- | |
# Copy the files to keep to another directory | |
find . ! -empty -type f -exec md5sum {} + | sort | uniq -w32 -dD | sort | uniq --check-chars 32 | cut -c 35- | xargs -Ifoo cp foo ../keep | |
# Remove the duplicate files | |
find . ! -empty -type f -exec md5sum {} + | sort | uniq -w32 -dD | cut -c 35- | xargs rm | |
# Restore the files we want to keep | |
cp ../keep/* . |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment