Last active
September 6, 2016 14:43
-
-
Save FabreFrederic/241125924e6ea5648c855d253bcf584e to your computer and use it in GitHub Desktop.
Get the X largest files from your repository
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# | |
# Get the X largest files from your repository | |
# You have to pass the number of file you want | |
# The result is stored in the folder $folder | |
# foundList.txt is the list of largest files which are not deleted yet | |
# deletedList.txt is the list of largest files which are already deleted. They cannot be found in your workspace | |
# | |
# | |
# Folder to store the result and the temp files | |
folder="./cleanedFiles" | |
# The X first files to find | |
filesNumber=$1 | |
# Is it a bare repository ? | |
if [ -e ".git" ]; | |
then | |
bareRepository=false | |
else | |
bareRepository=true | |
fi | |
echo "" | |
echo "You're workin' on a git bare repository : $bareRepository" | |
echo "Search for the first $filesNumber largest files in the repository history, sorted by size" | |
echo "" | |
if [ "$#" != 1 ]; | |
then | |
echo "Error : only one argument is needed : the files number" | |
exit 1 | |
fi | |
if [ -z "$1" ]; | |
then | |
echo "Files number argument not supplied." | |
exit 1 | |
fi | |
rm -Rf $folder | |
mkdir -p $folder/temp | |
echo "" | |
echo "The folder $folder is created" | |
# List of SHA1 of all the files in the repo (available in the workspace and history) | |
git rev-list --objects --all | sort -k 2 > $folder/temp/allFilesSHA1.txt | |
echo "" | |
echo "Git garbage collector :" | |
git gc | |
if [ "$bareRepository" = true ] ; then | |
pathIdx="objects/pack/pack-*.idx" | |
else | |
pathIdx=".git/objects/pack/pack-*.idx" | |
fi | |
# First largest objects list | |
git verify-pack -v $pathIdx | egrep "^\w+ blob\W+[0-9]+ [0-9]+ [0-9]+$" | sort -k 3 -n > $folder/temp/allBigObjects.txt | |
cat $folder/temp/allBigObjects.txt | tail -$filesNumber > $folder/temp/bigObjects.txt | |
echo "" | |
echo "$filesNumber first largest objects list created" | |
# Largest files list | |
for SHA in `cut -f 1 -d\ < $folder/temp/bigObjects.txt`; do | |
echo $(grep $SHA $folder/temp/bigObjects.txt) $(grep $SHA $folder/temp/allFilesSHA1.txt) | awk '{ print $1,$3,$7 }' >> $folder/temp/largeFiles.txt | |
done; | |
echo "" | |
echo "Largest files list created" | |
# Largest files sorted list | |
cat $folder/temp/largeFiles.txt | sort -k2 -nr > $folder/temp/largeFiles.sorted.txt | |
echo "" | |
echo "Largest files sorted list created" | |
while read line; do | |
path=$(echo $line | awk '{ print $3 }') | |
git cat-file -e HEAD:$path > /dev/null 2>&1 | |
if [ $? -ne 0 ]; | |
then | |
# File doesn't exist anymore in the last commit | |
echo $line >> $folder/deletedList.txt | |
else | |
# File can be found in the last commit | |
echo $line >> $folder/foundList.txt | |
fi | |
done < $folder/temp/largeFiles.sorted.txt | |
echo "Open the foundList.txt and the deletedList.txt files in the $folder folder" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment