dmorgan-github · July 15, 2014 19:17
diff --git a/f1.py b/f1.py
 import pandas as pd
 import numpy as np
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.metrics import f1_score

 target = 'e'
 features = ['a', 'b', 'c', 'd']
 cols = len(features)
 rows = 1000

 df = pd.DataFrame(np.random.randint(1, 5, (rows, cols)), columns=features)
 df[target] = np.random.randint(2, size=rows)

 # split the dataset
 l = range(rows)
 trainLen = int(rows*0.75)
 testLen  = int(rows*0.25)
 training = df.ix[l[:trainLen]]
 test = df.ix[l[trainLen:trainLen+testLen]]

 X_train = training[features]
 y_train = training[target]
 X_test = test[features]
 y_test = test[target]

 clf = RandomForestClassifier()
 clf.fit(X_train, y_train)
 preds = clf.predict(X_test)

 # show confusion matrix
 ct = pd.crosstab(y_test, preds, rownames=['actual'], colnames=['preds'])
 print ct

 #show f1 score
 f1 = f1_score(y_test, preds, average=None)
 print f1
	import pandas as pd
	import numpy as np
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.metrics import f1_score

	target = 'e'
	features = ['a', 'b', 'c', 'd']
	cols = len(features)
	rows = 1000

	df = pd.DataFrame(np.random.randint(1, 5, (rows, cols)), columns=features)
	df[target] = np.random.randint(2, size=rows)

	# split the dataset
	l = range(rows)
	trainLen = int(rows*0.75)
	testLen = int(rows*0.25)
	training = df.ix[l[:trainLen]]
	test = df.ix[l[trainLen:trainLen+testLen]]

	X_train = training[features]
	y_train = training[target]
	X_test = test[features]
	y_test = test[target]

	clf = RandomForestClassifier()
	clf.fit(X_train, y_train)
	preds = clf.predict(X_test)

	# show confusion matrix
	ct = pd.crosstab(y_test, preds, rownames=['actual'], colnames=['preds'])
	print ct

	#show f1 score
	f1 = f1_score(y_test, preds, average=None)
	print f1