jmsword · February 11, 2017 21:41
diff --git a/naive_bayes.py b/naive_bayes.py
 import pandas as pd
 import matplotlib.pyplot as plt
 from sklearn.naive_bayes import GaussianNB

 #I kept getting this error 'pandas.io.common.CParserError: Error tokenizing data. C error: Expected 1 fields in line 104, saw 3'
 #when trying to read in the data from GitHub so I just copied the data into a csv file and saved it locally
 df = pd.read_csv('ideal_weight.csv')

 #Remove single '' from coulmn names
 df.rename(columns=lambda x: x.replace("'", ""), inplace=True)

 #Remove single quotes from sex column
 df['sex'] = df['sex'].map(lambda x: x.replace("'", ""))

 #Plot histogram of actual & ideal weight
 plt.figure()
 a = df['actual'].hist()
 i = df['ideal'].hist()
 plt.show()

 #Plot histogram of difference in weight
 plt.figure()
 d = df['diff'].hist()
 plt.show()

 #Make sex a categorical variable
 df['sex'] = pd.Categorical(df['sex']).codes

 #Check to see if there are more females than males in the data
 print(df.groupby('sex').describe())

 #Create training & testing data
 train_set = int(len(df) * 0.7)
 train = df[:train_set]
 test = df[train_set:]

 #Create variables to fit into classifier model
 train_target = train['sex']
 train_data = train.ix[:,2:]

 #Classifier
 clf = GaussianNB()
 #Fit into model
 clf.fit(train_data, train_target)

 #Create variables to predict: 0 = female, 1 = male
 test_target = test['sex']
 test_data = test.ix[:,2:]
 pred = clf.predict(test_data)

 #Make first prediction
 print(clf.predict([145,160,-15]))
 #Make second prediction
 print(clf.predict([160,145,15]))
	import pandas as pd
	import matplotlib.pyplot as plt
	from sklearn.naive_bayes import GaussianNB

	#I kept getting this error 'pandas.io.common.CParserError: Error tokenizing data. C error: Expected 1 fields in line 104, saw 3'
	#when trying to read in the data from GitHub so I just copied the data into a csv file and saved it locally
	df = pd.read_csv('ideal_weight.csv')

	#Remove single '' from coulmn names
	df.rename(columns=lambda x: x.replace("'", ""), inplace=True)

	#Remove single quotes from sex column
	df['sex'] = df['sex'].map(lambda x: x.replace("'", ""))

	#Plot histogram of actual & ideal weight
	plt.figure()
	a = df['actual'].hist()
	i = df['ideal'].hist()
	plt.show()

	#Plot histogram of difference in weight
	plt.figure()
	d = df['diff'].hist()
	plt.show()

	#Make sex a categorical variable
	df['sex'] = pd.Categorical(df['sex']).codes

	#Check to see if there are more females than males in the data
	print(df.groupby('sex').describe())

	#Create training & testing data
	train_set = int(len(df) * 0.7)
	train = df[:train_set]
	test = df[train_set:]

	#Create variables to fit into classifier model
	train_target = train['sex']
	train_data = train.ix[:,2:]

	#Classifier
	clf = GaussianNB()
	#Fit into model
	clf.fit(train_data, train_target)

	#Create variables to predict: 0 = female, 1 = male
	test_target = test['sex']
	test_data = test.ix[:,2:]
	pred = clf.predict(test_data)

	#Make first prediction
	print(clf.predict([145,160,-15]))
	#Make second prediction
	print(clf.predict([160,145,15]))