Skip to content

Instantly share code, notes, and snippets.

@ScottSmith95
Last active March 4, 2018 00:15
Show Gist options
  • Save ScottSmith95/0ef69bc970d2051f24166e21a8fd658e to your computer and use it in GitHub Desktop.
Save ScottSmith95/0ef69bc970d2051f24166e21a8fd658e to your computer and use it in GitHub Desktop.
Reformat WDI data extract for Tableau.
# -*- coding: utf-8 -*-
import pandas as pd
in_file_name = 'WDIData.csv' # Make sure this file is in the same folder.
data = pd.read_csv(in_file_name)
data.replace('..', 'Null', inplace=True) # Replace WDI's ".." for missing data with "Null".
data.dropna(axis=1, how='all', inplace=True) # Drop columns with exclusively NA/Null data.
data.drop(data.columns[[1, 3]], axis=1, inplace=True) # Remove country code, series code.
data.rename(columns=lambda x: x.split(' [')[0], inplace=True) # Remove column name part after brackets. Turns "1960 [YR1960]" into "1960".
multi = data.set_index(['Series Name', 'Country Name']) # Multiindex by indicator then country.
"""
New format
Indicator |Country |2000 |2001 |...
GDP |USA |$1 |$1.50
|Ukraine |$1000 |$2000
Birth Rate |USA |X1 |X2
|Ukraine |Y1 |Y2
etc...
"""
stacked = multi.stack() # Stack the year columns into rows indexed under indicator then country.
"""
New format
Indicator |Country |Year |Value
GDP |USA |2000 |$1
| |2001 |$1.50
|Ukraine |2000 |$1000
| |2001 |$2000
Birth Rate |USA |2000 |X1
|2001 |X2
|Ukraine |2000 |Y1
| |2001 |Y2
etc...
"""
unstack = stacked.unstack(level='Series Name') # Move indicators to columns.
"""
New format
Indicators: |GDP |Birth Rate
Country |Year |Value |Value
USA |2000 |$1 |X1
|2001 |$1.50 |X2
Ukraine |2000 |$1000 |Y1
|2001 |$2000 |Y2
etc...
"""
unstack.index.names = ["Country Name", "Year"] # Rename indices.
unstack.to_csv('WDIData_reformat.csv') # Save to file.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment