Last active
December 4, 2017 13:42
-
-
Save Coconuthack/51b01d0a147b5db51381e2f0bcce21b1 to your computer and use it in GitHub Desktop.
Download Data for Practicum 5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tempfile | |
import urllib.request as urlreq | |
train_file = tempfile.NamedTemporaryFile() | |
test_file = tempfile.NamedTemporaryFile() | |
urlreq.urlretrieve("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data", train_file.name) | |
urlreq.urlretrieve("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test", test_file.name) | |
import pandas as pd | |
CSV_COLUMNS = [ | |
"age", "workclass", "fnlwgt", "education", "education_num", | |
"marital_status", "occupation", "relationship", "race", "gender", | |
"capital_gain", "capital_loss", "hours_per_week", "native_country", | |
"income_bracket"] | |
df_train = pd.read_csv(train_file.name, names=CSV_COLUMNS, skipinitialspace=True) | |
df_test = pd.read_csv(test_file.name, names=CSV_COLUMNS, skipinitialspace=True, skiprows=1) | |
train_labels = (df_train["income_bracket"].apply(lambda x: ">50K" in x)).astype(int) | |
test_labels = (df_test["income_bracket"].apply(lambda x: ">50K" in x)).astype(int) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment