standarderror · February 16, 2016 01:12
diff --git a/20160215_Decision_boundaries.py b/20160215_Decision_boundaries.py
 ### Prepeare python
 %pylab inline

 import numpy as np
 import pandas as pd
 import statsmodels.api as sm
 import scipy.stats as stats
 import neurolab as nl
 from sklearn.datasets import make_classification
 from sklearn import neighbors, tree, svm
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.naive_bayes import GaussianNB


 ### Helper functions for plotting
 # A function to plot observations on scatterplot
 def plotcases(ax):
    plt.scatter(xdf['x1'],xdf['x2'],c=xdf['y'], cmap=cm.coolwarm, axes=ax, alpha=0.6, s=20, lw=0.4)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.tick_params(axis='both', which='both', left='off', right='off', top='off', bottom='off', 
                   labelleft='off', labelright='off', labeltop='off', labelbottom='off')

 # a function to draw decision boundary and colour the regions
 def plotboundary(ax, Z):
    ax.pcolormesh(xx, yy, Z, cmap=cm.coolwarm, alpha=0.1)  
    ax.contour(xx, yy, Z, [0.5], linewidths=0.75, colors='k')


 ### Generate train data
 X, y = make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0, n_repeated=0,
                           n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.02, class_sep=0.5, 
                           hypercube=True, shift=0.0, scale=0.5, shuffle=True, random_state=5)

 xdf = pd.DataFrame(X, columns=['x1','x2'])
 xdf['y'] = y


 ### create plot canvas with 6x4 plots
 fig = plt.figure(figsize(12,16), dpi=1600) 
 plt.subplots_adjust(hspace=.5)             

 nrows = 7
 ncols = 4
 gridsize = (nrows, ncols)


 ### 1. plot the problem ###
 ax0 = plt.subplot2grid(gridsize,[0,0])
 plotcases(ax0)
 ax0.title.set_text("Problem")

 # take boundaries from first plot and define mesh of points for plotting decision spaces
 x_min, x_max = plt.xlim()
 y_min, y_max = plt.ylim()
 nx, ny = 100, 100   # this sets the num of points in the mesh
 xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),
                     np.linspace(y_min, y_max, ny))


 ############# kNN #################
 ### 2. kNN with k=5
 knn5 = neighbors.KNeighborsClassifier(n_neighbors=5)
 knn5.fit(xdf[['x1','x2']], xdf['y']) 

 Z = knn5.predict_proba(np.c_[xx.ravel(), yy.ravel()])
 Z = Z[:,1].reshape(xx.shape)

 ax = plt.subplot2grid(gridsize, [1,0])
 ax.title.set_text("kNN, k=5")
 plotboundary(ax, Z)
 plotcases(ax)


 ### 3. kNN with k=15
 knn15 = neighbors.KNeighborsClassifier(n_neighbors=15)
 knn15.fit(xdf[['x1','x2']], xdf['y']) 

 Z = knn15.predict_proba(np.c_[xx.ravel(), yy.ravel()])
 Z = Z[:,1].reshape(xx.shape)

 ax = plt.subplot2grid(gridsize,[1,1])
 ax.title.set_text("kNN, k=15")
 plotboundary(ax, Z)
 plotcases(ax)


 ########### logistic regression ##########
 ### 4. Logistic regression - simple linear
 formula = ('y ~ x1 + x2')
 LRm = sm.GLM.from_formula(formula=formula, data=xdf, family=sm.families.Binomial()).fit()

 XX = pd.DataFrame((vstack([xx.ravel(), yy.ravel()]).T))
 XX.columns = xdf.columns[0:2]
 Z = LRm.predict(XX)
 Z = Z.reshape(xx.shape)

 ax = plt.subplot2grid(gridsize, [2,0])
 ax.title.set_text("Logistic Regression\n simple")
 plotboundary(ax, Z)
 plotcases(ax)


 ### 5. Logistic regression - with basic polynomials
 formula = ('y ~ x1 + x2 + I(x1**2) + I(x2**2)')
 LRm = sm.GLM.from_formula(formula=formula, data=xdf, family=sm.families.Binomial()).fit()

 Z = LRm.predict(XX)
 Z = Z.reshape(xx.shape)

 ax = plt.subplot2grid(gridsize, [2,1])
 ax.title.set_text("Logistic Regression\n basic polynomials")
 plotboundary(ax, Z)
 plotcases(ax)


 ### 6. Logistic regression - with polynomials & interactions
 formula = ('y ~ x1 + x2 + x1*x2 + I(x1**2) + I(x2**2) + x1*I(x1**2) + x2*I(x2**2)\
                                + I(x1**3) + I(x2**3) + x1*I(x1**3) + x2*I(x2**3)')
 LRm = sm.GLM.from_formula(formula=formula, data=xdf, family=sm.families.Binomial()).fit()

 Z = LRm.predict(XX)
 Z = Z.reshape(xx.shape)

 ax = plt.subplot2grid(gridsize, [2,2])
 ax.title.set_text("Logistic Regression\n polynomials & interactions")
 plotboundary(ax, Z)
 plotcases(ax)


 ### 7. Logistic regression - with polynomials & interactions, regularised
 # smaller alpha = weaker regularisation. 
 formula = ('y ~ x1 + x2 + x1*x2 + I(x1**2) + I(x2**2) + x1*I(x1**2) + x2*I(x2**2)\
                                + I(x1**3) + I(x2**3) + x1*I(x1**3) + x2*I(x2**3)')
 LRm = sm.Logit.from_formula(formula=formula, data=xdf).fit_regularized(alpha=0.5, maxiter = 200)

 Z = LRm.predict(XX)
 Z = Z.reshape(xx.shape)

 ax = plt.subplot2grid(gridsize, [2,3])
 ax.title.set_text("Logistic Regression\n polynomials & interactions\n regularised")
 plotboundary(ax, Z)
 plotcases(ax)



 #####  TREE METHODS  ########
 ### 8. Decision tree
 dtree = tree.DecisionTreeClassifier()
 dtree = dtree.fit(xdf[['x1','x2']], xdf['y'])

 Z = dtree.predict_proba(np.c_[xx.ravel(), yy.ravel()])
 Z = Z[:,1].reshape(xx.shape)

 ax = plt.subplot2grid(gridsize, [3,0])
 ax.title.set_text("Decision tree")
 plotboundary(ax, Z)
 plotcases(ax)


 ### 9. Random forest
 rf = RandomForestClassifier()
 rf.fit(xdf[['x1','x2']], xdf['y'])

 Z = rf.predict_proba(np.c_[xx.ravel(), yy.ravel()])
 Z = Z[:,1].reshape(xx.shape)

 ax = plt.subplot2grid(gridsize, [3,1])
 ax.title.set_text("Random forest")
 plotboundary(ax, Z)
 plotcases(ax)



 ###### SUPPORT VECTOR MACHINES ###############
 ## 10. SVM with 4th order polynomials
 svc2 = svm.SVC(kernel='poly',degree=4, probability=True)  
 svc2.fit(xdf[['x1','x2']], xdf['y']) 

 Z = svc2.predict_proba(np.c_[xx.ravel(), yy.ravel()])
 Z = Z[:,1].reshape(xx.shape)

 ax = plt.subplot2grid(gridsize, [4,0])
 ax.title.set_text("SVM\n4th order polynomials")
 plotboundary(ax, Z)
 plotcases(ax)



 ## 11. SVM with radial basis function
 svc3 = svm.SVC(kernel='rbf', probability=True)  
 svc3.fit(xdf[['x1','x2']], xdf['y']) 

 Z = svc3.predict_proba(np.c_[xx.ravel(), yy.ravel()])
 Z = Z[:,1].reshape(xx.shape)

 ax = plt.subplot2grid(gridsize, [4,1])
 ax.title.set_text("SVM\n Radial basis function")
 plotboundary(ax, Z)
 plotcases(ax)


 ####### Bayesian & Probabilistic methods ############
 ### 12. Gaussian Naive Bayes
 gnb = GaussianNB()
 gnb.fit(xdf[['x1','x2']], xdf['y'])

 Z = gnb.predict_proba(np.c_[xx.ravel(), yy.ravel()])
 Z = Z[:,1].reshape(xx.shape)

 ax = plt.subplot2grid(gridsize, [5,0])
 ax.title.set_text("Gaussian Naive Bayes")
 plotboundary(ax, Z)
 plotcases(ax)


 ### 13. Gaussian Bayes, non-naive (joint distribution)
 ## Assumes multivariate normality 
 # Specify prior: p(class). Assume that this is constant across all regions
 pClass1 = len(xdf[xdf['y']==1]) / float(len(xdf))
 pClass2 = len(xdf[xdf['y']==0]) / float(len(xdf))

 ## Create likelihood distribution: p(datapoint | class)
 ## For each class, determine mean and variance across both variables
 # Collect means for both variables, for both classes
 Mux1C1 = xdf['x1'][xdf['y']==1].mean()
 Mux2C1 = xdf['x2'][xdf['y']==1].mean()
 Mux1C2 = xdf['x1'][xdf['y']==0].mean()
 Mux2C2 = xdf['x2'][xdf['y']==0].mean()

 # Collect vars for both variables, for both classes
 Varx1C1 = xdf['x1'][xdf['y']==1].std()
 Varx2C1 = xdf['x2'][xdf['y']==1].std()
 Varx1C2 = xdf['x1'][xdf['y']==0].std()
 Varx2C2 = xdf['x2'][xdf['y']==0].std()

 # use to create Normal distributions from these variables
 rangex1 = np.linspace(xdf['x1'].min(), xdf['x1'].max(), num=100)
 rangex2 = np.linspace(xdf['x2'].min(), xdf['x2'].max(), num=100)

 # generate Gaussian distributions for x1 and x2 within both class1 and class2
 C1x1 = np.array([stats.norm(Mux1C1, Varx1C1).pdf(i) for i in rangex1])
 C1x2 = np.array([stats.norm(Mux2C1, Varx2C1).pdf(i) for i in rangex2])
 C2x1 = np.array([stats.norm(Mux1C2, Varx1C2).pdf(i) for i in rangex1])
 C2x2 = np.array([stats.norm(Mux2C2, Varx2C2).pdf(i) for i in rangex2])

 # use this to create a joint likelihood distributions for class1 and class2: 
 # These contain the joint probabilities for any given point whether it is in class1 or class2
 pLikelihoodC1Joint = np.dot(C1x2[:, None], C1x1[None, :])
 pLikelihoodC2Joint = np.dot(C2x2[:, None], C2x1[None, :])

 # Finally, put them together to create the two posterior distributions: p(class | data)
 ScoreClass1GivenData = pLikelihoodC1Joint * pClass1 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))  
 ScoreClass2GivenData = pLikelihoodC2Joint * pClass2 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))

 ## for a given point, we calculate both likelihoods, and the max is the class we allocate to:
 # eg ScoreClass1GivenData > ScoreClass2GivenData
 Z = ScoreClass1GivenData > ScoreClass2GivenData
 Z = Z.reshape(xx.shape)

 ax = plt.subplot2grid(gridsize,[5,1])
 ax.title.set_text("Bayes: Full joint\n (Guassian) distribution")
 plotboundary(ax, Z)
 plotcases(ax)


 ### 14. Bayes with joint, nonparametric (kernel density estimated) distributions
 ##  = Does NOT assume normality

 # Specify prior: p(class). [same as above]

 # Create likelihood distribution: p(datapoint | class) 
 # = estimate distribution using Kernel Density 
 dens_1 = sm.nonparametric.KDEMultivariate(data=xdf[['x1','x2']][xdf['y']==0], 
                                          var_type='cc', bw='normal_reference')

 dens_2 = sm.nonparametric.KDEMultivariate(data=xdf[['x1','x2']][xdf['y']==1], 
                                          var_type='cc', bw='normal_reference')

 # generate distributions for x1 and x2 within both class1 and class2
 pLikelihoodC1Joint = dens_1.pdf(c_[xx.ravel(),yy.ravel()]) 
 pLikelihoodC2Joint = dens_2.pdf(c_[xx.ravel(),yy.ravel()]) 

 # Use Bayes rule to get posterior distribution, then convert back to matrix form
 ScoreClass1GivenData = pLikelihoodC1Joint * pClass1 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))
 ScoreClass2GivenData = pLikelihoodC2Joint * pClass2 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))

 ScoreClass1GivenData = ScoreClass1GivenData.reshape(xx.shape)
 ScoreClass2GivenData = ScoreClass2GivenData.reshape(xx.shape)

 # predict most likely class at each point on grid
 Z = ScoreClass1GivenData < ScoreClass2GivenData
 Z = Z.reshape(xx.shape)

 ax = plt.subplot2grid(gridsize, [5,2])
 ax.title.set_text("Bayes: Full joint\n KDE distribution")
 plotboundary(ax, Z)
 plotcases(ax)


 ####### Neural Networks #########
 ## Nb. these take a while to train & the neurolab library can be a bit tempramental
 ## Can delete this chunk of code if desired
 yarr = np.matrix(y).T

 ## Neural Net - 1HL,  3 hidden nodes
 net3 = nl.net.newff([[np.min( X[:,0]), np.max( X[:,0])], [np.min( X[:,1]), np.max( X[:,1])]], [4, 1])
 net3.trainf = nl.train.train_gd  # use gradient descent, bfgs is too buggy in neurolab
 err = net3.train(X, yarr, show=100, goal=0.01)

 Z = net3.sim(np.vstack([xx.ravel(), yy.ravel()]).T)
 Z = Z.reshape(xx.shape)

 ax = plt.subplot2grid(gridsize,[6,0])
 ax.title.set_text("Neural Net\n 1HL x 4 nodes")
 plotboundary(ax, Z)
 plotcases(ax)


 ## Neural Net - 2HL,  4 hidden nodes eacg
 net44 = nl.net.newff([[np.min( X[:,0]), np.max( X[:,0])], [np.min( X[:,1]), np.max( X[:,1])]], [4, 4, 1])
 net44.trainf = nl.train.train_gd  # use gradient descent, bfgs is too buggy in neurolab
 err = net44.train(X, yarr, show=100, goal=0.01, lr=0.01)

 Z = net44.sim(np.vstack([xx.ravel(), yy.ravel()]).T)
 Z = Z.reshape(xx.shape)

 ax = plt.subplot2grid(gridsize,[6,1])
 ax.title.set_text("Neural Net\n 2HL x 4 nodes each")
 plotboundary(ax, Z)
 plotcases(ax)
	### Prepeare python
	%pylab inline

	import numpy as np
	import pandas as pd
	import statsmodels.api as sm
	import scipy.stats as stats
	import neurolab as nl
	from sklearn.datasets import make_classification
	from sklearn import neighbors, tree, svm
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.naive_bayes import GaussianNB


	### Helper functions for plotting
	# A function to plot observations on scatterplot
	def plotcases(ax):
	plt.scatter(xdf['x1'],xdf['x2'],c=xdf['y'], cmap=cm.coolwarm, axes=ax, alpha=0.6, s=20, lw=0.4)
	ax.spines['top'].set_visible(False)
	ax.spines['right'].set_visible(False)
	ax.spines['left'].set_visible(False)
	ax.spines['bottom'].set_visible(False)
	ax.tick_params(axis='both', which='both', left='off', right='off', top='off', bottom='off',
	labelleft='off', labelright='off', labeltop='off', labelbottom='off')

	# a function to draw decision boundary and colour the regions
	def plotboundary(ax, Z):
	ax.pcolormesh(xx, yy, Z, cmap=cm.coolwarm, alpha=0.1)
	ax.contour(xx, yy, Z, [0.5], linewidths=0.75, colors='k')


	### Generate train data
	X, y = make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0, n_repeated=0,
	n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.02, class_sep=0.5,
	hypercube=True, shift=0.0, scale=0.5, shuffle=True, random_state=5)

	xdf = pd.DataFrame(X, columns=['x1','x2'])
	xdf['y'] = y


	### create plot canvas with 6x4 plots
	fig = plt.figure(figsize(12,16), dpi=1600)
	plt.subplots_adjust(hspace=.5)

	nrows = 7
	ncols = 4
	gridsize = (nrows, ncols)


	### 1. plot the problem ###
	ax0 = plt.subplot2grid(gridsize,[0,0])
	plotcases(ax0)
	ax0.title.set_text("Problem")

	# take boundaries from first plot and define mesh of points for plotting decision spaces
	x_min, x_max = plt.xlim()
	y_min, y_max = plt.ylim()
	nx, ny = 100, 100 # this sets the num of points in the mesh
	xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),
	np.linspace(y_min, y_max, ny))


	############# kNN #################
	### 2. kNN with k=5
	knn5 = neighbors.KNeighborsClassifier(n_neighbors=5)
	knn5.fit(xdf[['x1','x2']], xdf['y'])

	Z = knn5.predict_proba(np.c_[xx.ravel(), yy.ravel()])
	Z = Z[:,1].reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [1,0])
	ax.title.set_text("kNN, k=5")
	plotboundary(ax, Z)
	plotcases(ax)


	### 3. kNN with k=15
	knn15 = neighbors.KNeighborsClassifier(n_neighbors=15)
	knn15.fit(xdf[['x1','x2']], xdf['y'])

	Z = knn15.predict_proba(np.c_[xx.ravel(), yy.ravel()])
	Z = Z[:,1].reshape(xx.shape)

	ax = plt.subplot2grid(gridsize,[1,1])
	ax.title.set_text("kNN, k=15")
	plotboundary(ax, Z)
	plotcases(ax)


	########### logistic regression ##########
	### 4. Logistic regression - simple linear
	formula = ('y ~ x1 + x2')
	LRm = sm.GLM.from_formula(formula=formula, data=xdf, family=sm.families.Binomial()).fit()

	XX = pd.DataFrame((vstack([xx.ravel(), yy.ravel()]).T))
	XX.columns = xdf.columns[0:2]
	Z = LRm.predict(XX)
	Z = Z.reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [2,0])
	ax.title.set_text("Logistic Regression\n simple")
	plotboundary(ax, Z)
	plotcases(ax)


	### 5. Logistic regression - with basic polynomials
	formula = ('y ~ x1 + x2 + I(x12) + I(x22)')
	LRm = sm.GLM.from_formula(formula=formula, data=xdf, family=sm.families.Binomial()).fit()

	Z = LRm.predict(XX)
	Z = Z.reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [2,1])
	ax.title.set_text("Logistic Regression\n basic polynomials")
	plotboundary(ax, Z)
	plotcases(ax)


	### 6. Logistic regression - with polynomials & interactions
	formula = ('y ~ x1 + x2 + x1x2 + I(x12) + I(x22) + x1I(x1*2) + x2I(x2**2)\
	+ I(x13) + I(x23) + x1I(x13) + x2I(x2**3)')
	LRm = sm.GLM.from_formula(formula=formula, data=xdf, family=sm.families.Binomial()).fit()

	Z = LRm.predict(XX)
	Z = Z.reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [2,2])
	ax.title.set_text("Logistic Regression\n polynomials & interactions")
	plotboundary(ax, Z)
	plotcases(ax)


	### 7. Logistic regression - with polynomials & interactions, regularised
	# smaller alpha = weaker regularisation.
	formula = ('y ~ x1 + x2 + x1x2 + I(x12) + I(x22) + x1I(x1*2) + x2I(x2**2)\
	+ I(x13) + I(x23) + x1I(x13) + x2I(x2**3)')
	LRm = sm.Logit.from_formula(formula=formula, data=xdf).fit_regularized(alpha=0.5, maxiter = 200)

	Z = LRm.predict(XX)
	Z = Z.reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [2,3])
	ax.title.set_text("Logistic Regression\n polynomials & interactions\n regularised")
	plotboundary(ax, Z)
	plotcases(ax)



	##### TREE METHODS ########
	### 8. Decision tree
	dtree = tree.DecisionTreeClassifier()
	dtree = dtree.fit(xdf[['x1','x2']], xdf['y'])

	Z = dtree.predict_proba(np.c_[xx.ravel(), yy.ravel()])
	Z = Z[:,1].reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [3,0])
	ax.title.set_text("Decision tree")
	plotboundary(ax, Z)
	plotcases(ax)


	### 9. Random forest
	rf = RandomForestClassifier()
	rf.fit(xdf[['x1','x2']], xdf['y'])

	Z = rf.predict_proba(np.c_[xx.ravel(), yy.ravel()])
	Z = Z[:,1].reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [3,1])
	ax.title.set_text("Random forest")
	plotboundary(ax, Z)
	plotcases(ax)



	###### SUPPORT VECTOR MACHINES ###############
	## 10. SVM with 4th order polynomials
	svc2 = svm.SVC(kernel='poly',degree=4, probability=True)
	svc2.fit(xdf[['x1','x2']], xdf['y'])

	Z = svc2.predict_proba(np.c_[xx.ravel(), yy.ravel()])
	Z = Z[:,1].reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [4,0])
	ax.title.set_text("SVM\n4th order polynomials")
	plotboundary(ax, Z)
	plotcases(ax)



	## 11. SVM with radial basis function
	svc3 = svm.SVC(kernel='rbf', probability=True)
	svc3.fit(xdf[['x1','x2']], xdf['y'])

	Z = svc3.predict_proba(np.c_[xx.ravel(), yy.ravel()])
	Z = Z[:,1].reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [4,1])
	ax.title.set_text("SVM\n Radial basis function")
	plotboundary(ax, Z)
	plotcases(ax)


	####### Bayesian & Probabilistic methods ############
	### 12. Gaussian Naive Bayes
	gnb = GaussianNB()
	gnb.fit(xdf[['x1','x2']], xdf['y'])

	Z = gnb.predict_proba(np.c_[xx.ravel(), yy.ravel()])
	Z = Z[:,1].reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [5,0])
	ax.title.set_text("Gaussian Naive Bayes")
	plotboundary(ax, Z)
	plotcases(ax)


	### 13. Gaussian Bayes, non-naive (joint distribution)
	## Assumes multivariate normality
	# Specify prior: p(class). Assume that this is constant across all regions
	pClass1 = len(xdf[xdf['y']==1]) / float(len(xdf))
	pClass2 = len(xdf[xdf['y']==0]) / float(len(xdf))

	## Create likelihood distribution: p(datapoint \| class)
	## For each class, determine mean and variance across both variables
	# Collect means for both variables, for both classes
	Mux1C1 = xdf['x1'][xdf['y']==1].mean()
	Mux2C1 = xdf['x2'][xdf['y']==1].mean()
	Mux1C2 = xdf['x1'][xdf['y']==0].mean()
	Mux2C2 = xdf['x2'][xdf['y']==0].mean()

	# Collect vars for both variables, for both classes
	Varx1C1 = xdf['x1'][xdf['y']==1].std()
	Varx2C1 = xdf['x2'][xdf['y']==1].std()
	Varx1C2 = xdf['x1'][xdf['y']==0].std()
	Varx2C2 = xdf['x2'][xdf['y']==0].std()

	# use to create Normal distributions from these variables
	rangex1 = np.linspace(xdf['x1'].min(), xdf['x1'].max(), num=100)
	rangex2 = np.linspace(xdf['x2'].min(), xdf['x2'].max(), num=100)

	# generate Gaussian distributions for x1 and x2 within both class1 and class2
	C1x1 = np.array([stats.norm(Mux1C1, Varx1C1).pdf(i) for i in rangex1])
	C1x2 = np.array([stats.norm(Mux2C1, Varx2C1).pdf(i) for i in rangex2])
	C2x1 = np.array([stats.norm(Mux1C2, Varx1C2).pdf(i) for i in rangex1])
	C2x2 = np.array([stats.norm(Mux2C2, Varx2C2).pdf(i) for i in rangex2])

	# use this to create a joint likelihood distributions for class1 and class2:
	# These contain the joint probabilities for any given point whether it is in class1 or class2
	pLikelihoodC1Joint = np.dot(C1x2[:, None], C1x1[None, :])
	pLikelihoodC2Joint = np.dot(C2x2[:, None], C2x1[None, :])

	# Finally, put them together to create the two posterior distributions: p(class \| data)
	ScoreClass1GivenData = pLikelihoodC1Joint * pClass1 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))
	ScoreClass2GivenData = pLikelihoodC2Joint * pClass2 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))

	## for a given point, we calculate both likelihoods, and the max is the class we allocate to:
	# eg ScoreClass1GivenData > ScoreClass2GivenData
	Z = ScoreClass1GivenData > ScoreClass2GivenData
	Z = Z.reshape(xx.shape)

	ax = plt.subplot2grid(gridsize,[5,1])
	ax.title.set_text("Bayes: Full joint\n (Guassian) distribution")
	plotboundary(ax, Z)
	plotcases(ax)


	### 14. Bayes with joint, nonparametric (kernel density estimated) distributions
	## = Does NOT assume normality

	# Specify prior: p(class). [same as above]

	# Create likelihood distribution: p(datapoint \| class)
	# = estimate distribution using Kernel Density
	dens_1 = sm.nonparametric.KDEMultivariate(data=xdf[['x1','x2']][xdf['y']==0],
	var_type='cc', bw='normal_reference')

	dens_2 = sm.nonparametric.KDEMultivariate(data=xdf[['x1','x2']][xdf['y']==1],
	var_type='cc', bw='normal_reference')

	# generate distributions for x1 and x2 within both class1 and class2
	pLikelihoodC1Joint = dens_1.pdf(c_[xx.ravel(),yy.ravel()])
	pLikelihoodC2Joint = dens_2.pdf(c_[xx.ravel(),yy.ravel()])

	# Use Bayes rule to get posterior distribution, then convert back to matrix form
	ScoreClass1GivenData = pLikelihoodC1Joint * pClass1 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))
	ScoreClass2GivenData = pLikelihoodC2Joint * pClass2 / (sum(pLikelihoodC1Joint * pClass1) + sum(pLikelihoodC2Joint * pClass2))

	ScoreClass1GivenData = ScoreClass1GivenData.reshape(xx.shape)
	ScoreClass2GivenData = ScoreClass2GivenData.reshape(xx.shape)

	# predict most likely class at each point on grid
	Z = ScoreClass1GivenData < ScoreClass2GivenData
	Z = Z.reshape(xx.shape)

	ax = plt.subplot2grid(gridsize, [5,2])
	ax.title.set_text("Bayes: Full joint\n KDE distribution")
	plotboundary(ax, Z)
	plotcases(ax)


	####### Neural Networks #########
	## Nb. these take a while to train & the neurolab library can be a bit tempramental
	## Can delete this chunk of code if desired
	yarr = np.matrix(y).T

	## Neural Net - 1HL, 3 hidden nodes
	net3 = nl.net.newff([[np.min( X[:,0]), np.max( X[:,0])], [np.min( X[:,1]), np.max( X[:,1])]], [4, 1])
	net3.trainf = nl.train.train_gd # use gradient descent, bfgs is too buggy in neurolab
	err = net3.train(X, yarr, show=100, goal=0.01)

	Z = net3.sim(np.vstack([xx.ravel(), yy.ravel()]).T)
	Z = Z.reshape(xx.shape)

	ax = plt.subplot2grid(gridsize,[6,0])
	ax.title.set_text("Neural Net\n 1HL x 4 nodes")
	plotboundary(ax, Z)
	plotcases(ax)


	## Neural Net - 2HL, 4 hidden nodes eacg
	net44 = nl.net.newff([[np.min( X[:,0]), np.max( X[:,0])], [np.min( X[:,1]), np.max( X[:,1])]], [4, 4, 1])
	net44.trainf = nl.train.train_gd # use gradient descent, bfgs is too buggy in neurolab
	err = net44.train(X, yarr, show=100, goal=0.01, lr=0.01)

	Z = net44.sim(np.vstack([xx.ravel(), yy.ravel()]).T)
	Z = Z.reshape(xx.shape)

	ax = plt.subplot2grid(gridsize,[6,1])
	ax.title.set_text("Neural Net\n 2HL x 4 nodes each")
	plotboundary(ax, Z)
	plotcases(ax)