Skip to content

Instantly share code, notes, and snippets.

@simonkamronn
Created March 1, 2017 16:25
Show Gist options
  • Save simonkamronn/e846d88b8660f1aba7edbeca9afa1bd9 to your computer and use it in GitHub Desktop.
Save simonkamronn/e846d88b8660f1aba7edbeca9afa1bd9 to your computer and use it in GitHub Desktop.
Bland-Altman plot in Bokeh with vertical histogram and normal fit on the right y-axis
from scipy.stats import norm, shapiro, kstest, anderson
import bokeh.plotting as bplt
from bokeh import layouts
from bokeh.charts import Histogram, Scatter
from bokeh.models import Span
import pandas as pd
import numpy as np
def vertical_histogram(y):
vhist, vedges = np.histogram(y, bins=20)
vzeros = np.zeros(len(vedges)-1)
vmax = max(vhist)*1.1
pv = bplt.figure(toolbar_location=None, plot_width=200, plot_height=400, x_range=(0, vmax),
min_border=10, y_axis_location="right")
pv.ygrid.grid_line_color = None
pv.xaxis.major_label_orientation = np.pi/4
pv.background_fill_color = "#fafafa"
# Plot histogram
pv.quad(left=0, bottom=vedges[:-1], top=vedges[1:], right=vhist, color="white", line_color="#3A5785")
# Normal fit
mu, sigma = norm.fit(y)
xp = np.linspace(y.min(), y.max(), 100)
pdf = norm.pdf(xp, mu, sigma)
pdf = (vhist.max()-1)*pdf/pdf.max()
# Plot pdf of fit
pv.line(pdf, xp, line_color="#D95B43", line_width=8, alpha=0.7)
return pv
def bland_altman(df, s1, s2, color=None, marker=None, log_transform=True):
df = df.copy().dropna()
if log_transform:
df[s1] = np.log2(df[s1])
df[s2] = np.log2(df[s2])
# Calc average and difference
df = df.assign(x=(df[s1] + df[s2])/2, y=df[s1] - df[s2])
# Test for normality
print('Shapiro Wilk\n stats: {}, p: {}'.format(*shapiro(df['y'].values)))
print('Kolmogorov-Smirnov\n stats: {}, p: {}'.format(*kstest(df['y'].values, 'norm')))
print('Anderson-Darling\n stats: {}, critical_values: {}'.format(*anderson(df['y'].values, 'norm')))
# Make plots
p = Scatter(df, x='x', y='y', color=color, marker=marker, title='Bland-Altman',
plot_width=700, plot_height=400, toolbar_location="above")
mean_y = Span(location=df['y'].mean(),
dimension='width', line_color='green',
line_dash='dashed', line_width=3)
std_y_upper = Span(location=df['y'].mean() + df['y'].std() * 1.96,
dimension='width', line_color='red',
line_dash='dashed', line_width=3)
std_y_lower = Span(location=df['y'].mean() - df['y'].std() * 1.96,
dimension='width', line_color='red',
line_dash='dashed', line_width=3)
p.add_layout(mean_y)
p.add_layout(std_y_upper)
p.add_layout(std_y_lower)
p.xaxis.axis_label = 'Average'
p.yaxis.axis_label = 'Difference ({} - {})'.format(s1, s2)
p.legend.location = 'top_left'
# Create histogram and norm fit
pv = vertical_histogram(df['y'])
p = layouts.Row(p, pv)
return p
@simonkamronn
Copy link
Author

Example

@Masqueration
Copy link

This is great, spared me a lot of time. Thank you very much!
I was asked to also modify the dots sizes according to their # of repititions (Integers, so you don't see when there are multiple on the same x,y), if its any good I will gladly share.

@simonkamronn
Copy link
Author

Example in Altair/Vega

df = df.assign(y_res_upper=df.y_res.mean() + df.y_res.std()*1.96,
               y_res_lower=df.y_res.mean() - df.y_res.std()*1.96)

alt.hconcat(
    alt.layer(
        (alt.Chart(data=df, title='Bland-Altman plot')
         .mark_circle()
         .encode(x=alt.X('y_avg', axis=alt.Axis(title='Average')),
                 y=alt.Y('y_res', axis=alt.Axis(title='Residual')),
                 color=alt.Color('treatment:N', legend=alt.Legend(title='Treatment')),
                 size=alt.value(100))),
        alt.Chart(df).mark_rule().encode(y='y_res_upper'),
        alt.Chart(df).mark_rule().encode(y='y_res_lower'),
        alt.Chart(df).mark_rule(strokeDash=[4, 2], strokeWidth=2).encode(y='mean(y_res)')),

    (alt.Chart(df, width=100)
     .mark_bar()
     .encode(y=alt.Y('y_res', bin=alt.BinParams(base=20, divide=[5, 10]), axis=None),
             x=alt.X('y_avg', aggregate='count', axis=None)))
)

@simonkamronn
Copy link
Author

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment