Last active
February 7, 2018 04:40
-
-
Save tonypiazza/0c85d10447b68688015aa1133e7eafb0 to your computer and use it in GitHub Desktop.
BASH script to download flight data from the U.S. Department of Transportation, decompress it and then upload to S3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
echo 'Creating a temporary directory...' | |
temp_dir=`mktemp -d -q` | |
if [[ ! "$temp_dir" || ! -d "$temp_dir" ]]; then | |
echo 'Could not create temp dir!' | |
exit 1 | |
else | |
pushd "$temp_dir" | |
fi | |
function cleanup { | |
popd | |
rm -rf "$temp_dir" > /dev/null | |
} | |
trap cleanup EXIT | |
echo 'Name of S3 destination bucket:' | |
read bucket | |
result=$(aws s3api list-buckets --output json --query "Buckets[?Name == '$bucket'] | length(@)") | |
if [ $result -eq 0 ] | |
then | |
echo 'Invalid bucket specified!' | |
exit 2 | |
fi | |
echo 'Downloading files...' | |
curl http://stat-computing.org/dataexpo/2009/[1987-2008].csv.bz2 -o flights-#1.csv.bz2 | |
curl http://stat-computing.org/dataexpo/2009/airports.csv -o airports.csv | |
curl http://stat-computing.org/dataexpo/2009/carriers.csv -o carriers.csv | |
curl http://stat-computing.org/dataexpo/2009/plane-data.csv -o planes.csv | |
echo 'Decompressing files...' | |
bunzip2 flights-????.csv.bz2 | |
echo 'Uploading files to S3 bucket...' | |
for filename in *.csv; | |
do | |
aws s3 cp $filename s3://$bucket/csv/$filename | |
done | |
echo 'Processing complete.' | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This script obviously depends on you having the AWS CLI configured with appropriate permissions to access the S3 bucket.