Created
January 3, 2017 17:59
-
-
Save shengch02/820cb497f54a75c655e6c8bf3655424c to your computer and use it in GitHub Desktop.
(Python) Explore various evaluation metrics: accuracy, confusion matrix, precision, recall. Explore how various metrics can be combined to produce a cost of making an error. Explore precision and recall curves.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#explore precision and recall | |
import pandas as pd | |
import numpy as np | |
#the dataset consists of baby product reviews on Amazon.com | |
import sframe | |
products = sframe.SFrame('amazon_baby.gl/') | |
#clean the original data: remove punctuation, fill in N/A, remove neutral sentiment, | |
# perform a train/test split, produce word count matrix | |
def remove_punctuation(text): | |
import string | |
return text.translate(None, string.punctuation) | |
products['review_clean']=products['review'].apply(remove_punctuation) | |
products = products.fillna('review','') | |
#ignore all reviews with rating=3, and classify the reviews as positive or negative | |
products = products[products['rating'] != 3] | |
products['sentiment']=products['rating'].apply(lambda rating : +1 | |
if rating > 3 else -1) | |
#split dataset into training and test sets | |
train_data, test_data = products.random_split(0.8, seed=1) | |
#calculate the word-count-matrix | |
from sklearn.feature_extraction.text import CountVectorizer | |
vectorizer = CountVectorizer(token_pattern=r'\b\w+\b') | |
train_matrix = vectorizer.fit_transform(train_data['review_clean']) | |
test_matrix = vectorizer.transform(test_data['review_clean']) | |
print train_matrix | |
#scikit learn, LogisticRegression(), training | |
from sklearn.linear_model import LogisticRegression | |
model = LogisticRegression() | |
model.fit(train_matrix, train_data['sentiment']) | |
#measure accuracy | |
from sklearn.metrics import accuracy_score | |
accuracy = accuracy_score(y_true=test_data['sentiment'].to_numpy(), | |
y_pred=model.predict(test_matrix)) | |
print 'Test Accuracy: %s' % accuracy | |
#use the majority class classifier as a baseline | |
baseline = len(test_data[test_data['sentiment']==1])/float(len(test_data)) | |
print 'Baseline accuracy : %s' % baseline | |
#confusion matrix | |
from sklearn.metrics import confusion_matrix | |
cmat = confusion_matrix(y_true=test_data['sentiment'].to_numpy(), | |
y_pred=model.predict(test_matrix), | |
labels=model.classes_) | |
print 'target_label | predicted_label | count' | |
print '--------------------------------------' | |
for i, target_label in enumerate(model.classes_): | |
for j, predicted_label in enumerate(model.classes_): | |
print '{0:^13} | {1:^15} | {2:5d}'.format(target_label, | |
predicted_label, cmat[i,j]) | |
#compute the precision of the logistic regression classifier | |
from sklearn.metrics import precision_score | |
precision = precision_score(y_true=test_data['sentiment'].to_numpy(), | |
y_pred=model.predict(test_matrix)) | |
#recall | |
from sklearn.metrics import recall_score | |
recall = recall_score(y_true=test_data['sentiment'].to_numpy(), | |
y_pred=model.predict(test_matrix)) | |
#precision-recall tradeoff | |
def apply_threshold(probabilities, threshold): | |
return sframe.SArray(probabilities).apply(lambda x: +1 if x>threshold else -1) | |
probabilities = model.predict_proba(test_matrix)[:,1] | |
y_pred_05 = apply_threshold(probabilities, 0.5) | |
y_pred_09 = apply_threshold(probabilities, 0.9) | |
print sum(y_pred_05==1) #28745 | |
print sum(y_pred_09==1) #25070 | |
print precision_score(y_true=test_data['sentiment'].to_numpy(), y_pred=y_pred_05) #0.9494 | |
print precision_score(y_true=test_data['sentiment'].to_numpy(), y_pred=y_pred_09) #0.9815 | |
print recall_score(y_true=test_data['sentiment'].to_numpy(), y_pred=y_pred_05) #0.9714 | |
print recall_score(y_true=test_data['sentiment'].to_numpy(), y_pred=y_pred_09) #0.8758 | |
#precision-recall curve | |
threshold_values = np.linspace(0.5, 1, num=101) | |
print threshold_values | |
precision_all = [] | |
recall_all = [] | |
for threshold in threshold_values: | |
pred = apply_threshold(probabilities, threshold) | |
precision_all.append(precision_score(y_true=test_data['sentiment'].to_numpy(), y_pred=pred)) | |
recall_all.append(recall_score(y_true=test_data['sentiment'].to_numpy(), y_pred=pred)) | |
precision_all[100]=1.0 #0.710 is the smallest threshold value that achieve precision 0.965 | |
#8208 false negatives | |
import matplotlib.pyplot as plt | |
def plot_pr_curve(precision, recall, title): | |
plt.rcParams['figure.figsize'] = 7, 5 | |
plt.locator_params(axis='x', nbins=5) | |
plt.plot(precision, recall, 'b-', linewidth=2.0) | |
plt.title(title) | |
plt.xlabel('Precision') | |
plt.ylabel('Recall') | |
plt.show() | |
plot_pr_curve(precision_all, recall_all, 'Precision recall curve') | |
#evaluate specific search terms | |
baby_reviews = test_data[test_data['name'].apply(lambda x: 'baby' in x.lower())] | |
baby_matrix = vectorizer.transform(baby_reviews['review_clean']) | |
probabilities = model.predict_proba(baby_matrix)[:,1] | |
precision_baby = [] | |
recall_baby = [] | |
for threshold in threshold_values: | |
pred = apply_threshold(probabilities, threshold) | |
precision_baby.append(precision_score(y_true=baby_reviews['sentiment'].to_numpy(), y_pred=pred)) | |
recall_baby.append(recall_score(y_true=baby_reviews['sentiment'].to_numpy(), y_pred=pred)) | |
precision_baby[100]=1.0 #0.735 is the smallest threshold value that achieve precision 0.965 | |
plot_pr_curve(precision_baby, recall_baby, "Precision-Recall (Baby)") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment