Created
March 1, 2017 16:25
-
-
Save simonkamronn/e846d88b8660f1aba7edbeca9afa1bd9 to your computer and use it in GitHub Desktop.
Bland-Altman plot in Bokeh with vertical histogram and normal fit on the right y-axis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.stats import norm, shapiro, kstest, anderson | |
import bokeh.plotting as bplt | |
from bokeh import layouts | |
from bokeh.charts import Histogram, Scatter | |
from bokeh.models import Span | |
import pandas as pd | |
import numpy as np | |
def vertical_histogram(y): | |
vhist, vedges = np.histogram(y, bins=20) | |
vzeros = np.zeros(len(vedges)-1) | |
vmax = max(vhist)*1.1 | |
pv = bplt.figure(toolbar_location=None, plot_width=200, plot_height=400, x_range=(0, vmax), | |
min_border=10, y_axis_location="right") | |
pv.ygrid.grid_line_color = None | |
pv.xaxis.major_label_orientation = np.pi/4 | |
pv.background_fill_color = "#fafafa" | |
# Plot histogram | |
pv.quad(left=0, bottom=vedges[:-1], top=vedges[1:], right=vhist, color="white", line_color="#3A5785") | |
# Normal fit | |
mu, sigma = norm.fit(y) | |
xp = np.linspace(y.min(), y.max(), 100) | |
pdf = norm.pdf(xp, mu, sigma) | |
pdf = (vhist.max()-1)*pdf/pdf.max() | |
# Plot pdf of fit | |
pv.line(pdf, xp, line_color="#D95B43", line_width=8, alpha=0.7) | |
return pv | |
def bland_altman(df, s1, s2, color=None, marker=None, log_transform=True): | |
df = df.copy().dropna() | |
if log_transform: | |
df[s1] = np.log2(df[s1]) | |
df[s2] = np.log2(df[s2]) | |
# Calc average and difference | |
df = df.assign(x=(df[s1] + df[s2])/2, y=df[s1] - df[s2]) | |
# Test for normality | |
print('Shapiro Wilk\n stats: {}, p: {}'.format(*shapiro(df['y'].values))) | |
print('Kolmogorov-Smirnov\n stats: {}, p: {}'.format(*kstest(df['y'].values, 'norm'))) | |
print('Anderson-Darling\n stats: {}, critical_values: {}'.format(*anderson(df['y'].values, 'norm'))) | |
# Make plots | |
p = Scatter(df, x='x', y='y', color=color, marker=marker, title='Bland-Altman', | |
plot_width=700, plot_height=400, toolbar_location="above") | |
mean_y = Span(location=df['y'].mean(), | |
dimension='width', line_color='green', | |
line_dash='dashed', line_width=3) | |
std_y_upper = Span(location=df['y'].mean() + df['y'].std() * 1.96, | |
dimension='width', line_color='red', | |
line_dash='dashed', line_width=3) | |
std_y_lower = Span(location=df['y'].mean() - df['y'].std() * 1.96, | |
dimension='width', line_color='red', | |
line_dash='dashed', line_width=3) | |
p.add_layout(mean_y) | |
p.add_layout(std_y_upper) | |
p.add_layout(std_y_lower) | |
p.xaxis.axis_label = 'Average' | |
p.yaxis.axis_label = 'Difference ({} - {})'.format(s1, s2) | |
p.legend.location = 'top_left' | |
# Create histogram and norm fit | |
pv = vertical_histogram(df['y']) | |
p = layouts.Row(p, pv) | |
return p |
Author
simonkamronn
commented
Mar 1, 2017
This is great, spared me a lot of time. Thank you very much!
I was asked to also modify the dots sizes according to their # of repititions (Integers, so you don't see when there are multiple on the same x,y), if its any good I will gladly share.
Example in Altair/Vega
df = df.assign(y_res_upper=df.y_res.mean() + df.y_res.std()*1.96,
y_res_lower=df.y_res.mean() - df.y_res.std()*1.96)
alt.hconcat(
alt.layer(
(alt.Chart(data=df, title='Bland-Altman plot')
.mark_circle()
.encode(x=alt.X('y_avg', axis=alt.Axis(title='Average')),
y=alt.Y('y_res', axis=alt.Axis(title='Residual')),
color=alt.Color('treatment:N', legend=alt.Legend(title='Treatment')),
size=alt.value(100))),
alt.Chart(df).mark_rule().encode(y='y_res_upper'),
alt.Chart(df).mark_rule().encode(y='y_res_lower'),
alt.Chart(df).mark_rule(strokeDash=[4, 2], strokeWidth=2).encode(y='mean(y_res)')),
(alt.Chart(df, width=100)
.mark_bar()
.encode(y=alt.Y('y_res', bin=alt.BinParams(base=20, divide=[5, 10]), axis=None),
x=alt.X('y_avg', aggregate='count', axis=None)))
)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment