Created
February 11, 2017 21:41
-
-
Save jmsword/661995b81a31a5b6a561d97b43c577fa to your computer and use it in GitHub Desktop.
Naive Bayes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
from sklearn.naive_bayes import GaussianNB | |
#I kept getting this error 'pandas.io.common.CParserError: Error tokenizing data. C error: Expected 1 fields in line 104, saw 3' | |
#when trying to read in the data from GitHub so I just copied the data into a csv file and saved it locally | |
df = pd.read_csv('ideal_weight.csv') | |
#Remove single '' from coulmn names | |
df.rename(columns=lambda x: x.replace("'", ""), inplace=True) | |
#Remove single quotes from sex column | |
df['sex'] = df['sex'].map(lambda x: x.replace("'", "")) | |
#Plot histogram of actual & ideal weight | |
plt.figure() | |
a = df['actual'].hist() | |
i = df['ideal'].hist() | |
plt.show() | |
#Plot histogram of difference in weight | |
plt.figure() | |
d = df['diff'].hist() | |
plt.show() | |
#Make sex a categorical variable | |
df['sex'] = pd.Categorical(df['sex']).codes | |
#Check to see if there are more females than males in the data | |
print(df.groupby('sex').describe()) | |
#Create training & testing data | |
train_set = int(len(df) * 0.7) | |
train = df[:train_set] | |
test = df[train_set:] | |
#Create variables to fit into classifier model | |
train_target = train['sex'] | |
train_data = train.ix[:,2:] | |
#Classifier | |
clf = GaussianNB() | |
#Fit into model | |
clf.fit(train_data, train_target) | |
#Create variables to predict: 0 = female, 1 = male | |
test_target = test['sex'] | |
test_data = test.ix[:,2:] | |
pred = clf.predict(test_data) | |
#Make first prediction | |
print(clf.predict([145,160,-15])) | |
#Make second prediction | |
print(clf.predict([160,145,15])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment