Created
April 17, 2011 23:23
-
-
Save onyxfish/924589 to your computer and use it in GitHub Desktop.
Full script for the csvkit tutorial (proof of process repeatability)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
################### | |
# Getting started # | |
################### | |
# Setup a workspace | |
mkdir va_benefits | |
cd va_benefits | |
# This won't work | |
wget -O 2009.csv http://www.data.gov/download/4029/csv | |
cat 2009.csv | |
# Get the files | |
wget -O 2009.csv -U "Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.205 Safari/534.16" http://www.data.gov/download/4029/csv | |
head -n 5 2009.csv | |
wget -O 2010.csv -U "Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.205 Safari/534.16" http://www.data.gov/download/4509/csv | |
head -n 5 2010.csv | |
# Backup files | |
cp 2009.csv 2009_original.csv | |
cp 2010.csv 2010_original.csv | |
# Fix the header of the second file | |
cat 2010_original.csv | sed "1,2d" > 2010.csv | |
###################### | |
# Examining the data # | |
###################### | |
# Slicing columns | |
csvcut -n 2009.csv | |
csvcut -c 2,3 2009.csv | head -n 5 | |
# Computing statistics | |
csvcut -c 1,4,9,10 2009.csv | csvsummary | |
# Searching for Illinois | |
csvcut -c 1,"TOTAL" 2009.csv | csvgrep -c 1 ILLINOIS | |
csvcut -c 1,"TOTAL" 2009.csv | csvgrep -c 1 -r "^I" | |
# Building up a command to sort by total | |
csvcut -c 9,1 2009.csv | head -n 5 | |
csvcut -c 9,1 2009.csv | csvsort -r | head -n 5 | |
csvcut -c 9,1 2009.csv | csvsort -r -l | head -n 11 | |
# Using csvlook | |
csvcut -c 9,1 2009.csv | csvsort -r -l | csvlook | |
# Save our work | |
csvcut -c 9,1 2009.csv | csvsort -r -l > 2009_ranking.csv | |
############################### | |
# Adding another year of data # | |
############################### | |
TODO | |
############### | |
# Wrapping up # | |
############### | |
TODO |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment