Created
December 29, 2022 10:59
-
-
Save carloscabo/1ae736b6407651a3e3f8a056986ca459 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Author : [email protected] | |
# Copyright (c) Sean Maddison | |
# Notes: | |
# | |
# - My first attempt at writing a bash script! | |
# Notes | |
# ----- | |
# Set deletefiles to true below to remove duplicates when found, set to false just to log | |
# Default image duplicate threshold is 90%, change dupeThreshold below to alter this | |
# TODO | |
# * Parameterise prioritise largest file | |
# * log file stuff | |
filename=image_dupes_found.txt | |
deletefiles=false | |
LOG_FILE=/var/log/image-dupes.sh.log | |
dupeThreshold=90% | |
echo -e "! deletefiles is set to $deletefiles" | |
if [ -z "$1" ] | |
then | |
echo " - no working directory provided, defaulting to current directory" | |
workingDir=./ | |
else | |
echo " - working directory provided" | |
workingDir=$1; | |
fi | |
echo "working directory is $workingDir" | |
# write duplicates to file | |
workingFile=$workingDir/$filename | |
# Write findimagedupes output to a file so we can work through it | |
echo Writing findimagedupes output to $workingFile | |
findimagedupes --threshold=$dupeThreshold $workingDir > $workingFile | |
# loop through each line-item in the file | |
fileLines=$(cat $workingFile) | |
lineCount=1 | |
dupesFound=0 | |
deletedFiles=0 | |
# Set newlines to be terminator for the loop, not spaces | |
IFS=$'\n' | |
for line in $fileLines | |
do | |
prevSize=0 | |
echo "line $lineCount is $line" | |
lineCount=$((lineCount+1)) | |
# split each line by whitespace, loop through and check each file size, select the largest file to keep | |
IFS=' ' read -ra thisDuplicate <<< "$line" | |
for thisEntry in "${thisDuplicate[@]}" | |
do | |
dupesFound=$((dupesFound+1)) | |
thisSize=$(stat --printf="%s" $thisEntry) | |
echo - single found item is $thisEntry with file size of $thisSize bytes | |
# find largest file | |
if [[ "$thisSize" -gt "$prevSize" ]]; then | |
fileToKeep=$thisEntry | |
fi | |
done | |
echo - largest file is $fileToKeep, this file will be kept when deleting duplicates | |
dupesFound=$((dupesFound-1)) | |
echo --- | |
# now find the files to delete | |
IFS=' ' read -ra thisDuplicate <<< "$line" | |
for thisEntry in "${thisDuplicate[@]}" | |
do | |
# find largest file | |
if [[ "$thisEntry" != "$fileToKeep" ]]; then | |
echo - file to be deleted is $thisEntry | |
# delete file if --delete is passed in | |
if [[ "$deletefiles" == "true" ]]; then | |
echo -e " - deleting duplicates" | |
rm -v $thisEntry | |
deletedFiles=$((deletedFiles+1)) | |
fi | |
fi | |
done | |
echo ----- | |
done | |
echo -e " - $dupesFound duplicates found, not including largest original file" | |
echo -e " - $deletedFiles files deleted." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment