Last active
August 18, 2022 18:30
-
-
Save ddre54/02dfdf30fda279f23e6c496775e3ba38 to your computer and use it in GitHub Desktop.
Download all the logs from Papertrail and Process specific logs based on grep expression
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Download all the logs from Papertrail and Process specific logs based on grep expression | |
# - 1 Download the logs | |
curl -sH 'X-Papertrail-Token: YOUR-HTTP-API-KEY' https://papertrailapp.com/api/v1/archives.json | | |
grep -o '"filename":"[^"]*"' | egrep -o '[0-9-]{3,}' | | |
awk '{ | |
print "url = https://papertrailapp.com/api/v1/archives/" $0 "/download" | |
print "output = " $0 ".tsv.gz" | |
}' | curl --globoff --progress-bar -fLH 'X-Papertrail-Token: YOUR-HTTP-API-KEY' -K- | |
# - 2 Iterates through compressed files in directory | |
# - 3 Uncompress them | |
# - 4 Grep specifi expression into an out directory with the base file name | |
# - 5 Removes uncompressed file (to recover disk space) | |
# - 6 Logs that file has been processed | |
for f in *.gz ; do FILENAME=`basename ${f%%.*}`; gunzip ${f}; grep access.log ${FILENAME}.tsv > access_logs/${FILENAME}.tsv; rm ${FILENAME}.tsv; echo --${f}-Done-; done | |
# - 7 Extract unique domains from the access logs | |
for f in *.tsv ; do FILENAME=`basename ${f%%.*}`; awk '{print $12}' ${f}; sort; uniq > ${FILENAME}_domains.tsv; echo --${f}-Done-; done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment