OndraZizka · May 17, 2016 00:56
diff --git a/find-duplicate-files.bash b/find-duplicate-files.bash
 find -type f -size +3M -print0 | while IFS= read -r -d '' i; do
  #echo $i
  echo -n '.'
  if grep -q "$i" md5-partial.txt; then
    echo -n ':'; #-e "\n$i  ---- Already counted, skipping.";
    continue;
  fi
  #md5sum "$i" >> md5.txt
  MD5=`dd bs=1M count=1 if="$i" status=none | md5sum`
  MD5=`echo $MD5 | cut -d' ' -f1`
  if grep "$MD5" md5-partial.txt; then echo -e "Duplicate: $i"; fi
  echo $MD5 $i >> md5-partial.txt
 done
 fi

 ## Show the duplicates
 #sort md5-partial.txt | uniq  --check-chars=32 -d -c
 #sort md5-partial.txt | uniq  --check-chars=32 -d -c | sort -b -n
 #sort md5-partial.txt | uniq  --check-chars=32 -d -c | sort -b -n | cut -c 9-40 | xargs -I '{}' sh -c "grep '{}'  md5-partial.txt && echo"

 ## Show wasted space
 if [ false ] ; then
 sort md5-partial.txt | uniq  --check-chars=32 -d -c | while IFS= read -r -d '' LINE; do
  HASH=`echo $LINE | cut -c 9-40`;
  PATH=`echo $LINE | cut -c 41-`;
  ls -l '$PATH' | cud -c 26-34
 done
	find -type f -size +3M -print0 \| while IFS= read -r -d '' i; do
	#echo $i
	echo -n '.'
	if grep -q "$i" md5-partial.txt; then
	echo -n ':'; #-e "\n$i ---- Already counted, skipping.";
	continue;
	fi
	#md5sum "$i" >> md5.txt
	MD5=`dd bs=1M count=1 if="$i" status=none \| md5sum`
	MD5=`echo $MD5 \| cut -d' ' -f1`
	if grep "$MD5" md5-partial.txt; then echo -e "Duplicate: $i"; fi
	echo $MD5 $i >> md5-partial.txt
	done
	fi

	## Show the duplicates
	#sort md5-partial.txt \| uniq --check-chars=32 -d -c
	#sort md5-partial.txt \| uniq --check-chars=32 -d -c \| sort -b -n
	#sort md5-partial.txt \| uniq --check-chars=32 -d -c \| sort -b -n \| cut -c 9-40 \| xargs -I '{}' sh -c "grep '{}' md5-partial.txt && echo"

	## Show wasted space
	if [ false ] ; then
	sort md5-partial.txt \| uniq --check-chars=32 -d -c \| while IFS= read -r -d '' LINE; do
	HASH=`echo $LINE \| cut -c 9-40`;
	PATH=`echo $LINE \| cut -c 41-`;
	ls -l '$PATH' \| cud -c 26-34
	done
No results found