carloscabo · December 29, 2022 10:59
diff --git a/find-and-delete-image-dupes.sh b/find-and-delete-image-dupes.sh
 #!/bin/bash

 # Author : [email protected]
 # Copyright (c) Sean Maddison
 # Notes:
 # 
 # - My first attempt at writing a bash script!

 # Notes
 # -----
 # Set deletefiles to true below to remove duplicates when found, set to false just to log
 # Default image duplicate threshold is 90%, change dupeThreshold below to alter this

 # TODO
 # * Parameterise prioritise largest file
 # * log file stuff

 filename=image_dupes_found.txt
 deletefiles=false
 LOG_FILE=/var/log/image-dupes.sh.log
 dupeThreshold=90%

 echo -e "! deletefiles is set to $deletefiles"


 if [ -z "$1" ]
 then
  echo " - no working directory provided, defaulting to current directory"
  workingDir=./
 else
  echo " - working directory provided"
  workingDir=$1;
 fi

 echo "working directory is $workingDir"

 # write duplicates to file
 workingFile=$workingDir/$filename

 # Write findimagedupes output to a file so we can work through it
 echo Writing findimagedupes output to $workingFile
 findimagedupes --threshold=$dupeThreshold $workingDir > $workingFile

 # loop through each line-item in the file

 fileLines=$(cat $workingFile)
 lineCount=1
 dupesFound=0
 deletedFiles=0
 # Set newlines to be terminator for the loop, not spaces
 IFS=$'\n'

 for line in $fileLines
 do
  prevSize=0
  echo "line $lineCount is $line"
  lineCount=$((lineCount+1))

  # split each line by whitespace, loop through and check each file size, select the largest file to keep
  IFS=' ' read -ra thisDuplicate <<< "$line"
  for thisEntry in "${thisDuplicate[@]}"
  do
    dupesFound=$((dupesFound+1))
    thisSize=$(stat --printf="%s" $thisEntry)
    echo - single found item is $thisEntry with file size of $thisSize bytes
    # find largest file
    if [[ "$thisSize" -gt "$prevSize" ]]; then
      fileToKeep=$thisEntry
    fi
  done
  echo - largest file is $fileToKeep, this file will be kept when deleting duplicates
  dupesFound=$((dupesFound-1))
  echo ---

  # now find the files to delete
  IFS=' ' read -ra thisDuplicate <<< "$line"
  for thisEntry in "${thisDuplicate[@]}"
  do
    # find largest file
    if [[ "$thisEntry" != "$fileToKeep" ]]; then
      echo - file to be deleted is $thisEntry
      # delete file if --delete is passed in
      if [[ "$deletefiles" == "true" ]]; then
        echo -e " - deleting duplicates"
        rm -v $thisEntry
        deletedFiles=$((deletedFiles+1))
      fi
    fi
  done
  echo -----
 done
 echo -e " - $dupesFound duplicates found, not including largest original file"
 echo -e " - $deletedFiles files deleted."