Last active
January 9, 2024 21:57
-
-
Save rlskoeser/7f376cca3b47bb774df46f71942e199f to your computer and use it in GitHub Desktop.
shell script to count number of transcription and text files and documents in pgp-text git repo over time
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# based on https://blog.benoitblanchon.fr/git-file-count-vs-time/ | |
OUTPUT=stats.csv | |
# create output file with a CSV header | |
# echo "date;transcription_count;transcribed_documents;translation_count;translated_documents" > $OUTPUT | |
echo "date,transcriptions,transcribed_documents,translations,translated_documents" > $OUTPUT | |
# function that counts files matching the specified regex | |
count() { | |
git ls-tree -r --name-only $COMMIT | grep -e $1 | wc -l | sed 's/ //g' | |
} | |
# adapt file count to count unique pgpids | |
count_documents() { | |
git ls-tree -r --name-only $COMMIT | grep -e $1 | sed -re "s/^.*(PGPID[0-9]+)_.*$/\1/" | uniq | wc -l | sed 's/ //g' | |
} | |
# for each commit in log | |
git log --pretty="%H %cd" --date=short | while read COMMIT DATE | |
do | |
# skip commits made on the same day | |
[ "$PREV_DATE" == "$DATE" ] && continue | |
PREV_DATE="$DATE" | |
# count files | |
XSCRIPT_FILES=$(count ".*/*_transcription.txt$") | |
TRANSLATION_FILES=$(count ".*/*_translation.txt$") | |
# count documents | |
XSCRIPT_DOCS=$(count_documents ".*/*_transcription.txt$") | |
TRANSLATION_DOCS=$(count_documents ".*/*_translation.txt$") | |
# print to console | |
echo $DATE | |
echo " $XSCRIPT_FILES transcription.txt files" | |
echo " $XSCRIPT_DOCS transcription.txt PGPIDs" | |
echo " $TRANSLATION_FILES translation.txt files" | |
echo " $TRANSLATION_DOCS translation.txt PGPIDs" | |
# append to CSV file | |
# echo "$DATE;$XSCRIPT_FILES;$TRANSLATION_FILES" >> $OUTPUT | |
echo "$DATE,$XSCRIPT_FILES,$XSCRIPT_DOCS,$TRANSLATION_FILES,$TRANSLATION_DOCS" >> $OUTPUT | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment