Last active
August 29, 2015 14:22
-
-
Save Zolmeister/dcdf1a85e491e8aaf8d5 to your computer and use it in GitHub Desktop.
A/B test calculations
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################################ | |
# A/B test calculations # | |
# # | |
# Based on Stats Engine # | |
# //pages.optimizely.com/rs/optimizely/images/stats_engine_technical_paper.pdf # | |
################################################################################ | |
################## | |
# Helper Methods # | |
################## | |
Z_MAX = 6 # Maxium +/- z value | |
# | |
# probability of normal z value | |
# Adapted from a polynomial approximation in: | |
# Ibbetson D, Algorithm 209 Collected Algorithms of the CACM 1963 p. 616 | |
# Note: This routine has six digit accuracy, so it is only useful for absolute | |
# z values <= 6. For z values > to 6.0, poz() returns 0.0 | |
# | |
# source: https://www.fourmilab.ch/rpkp/experiments/analysis/zCalc.html | |
# | |
poz = (z) -> | |
y = undefined | |
x = undefined | |
w = undefined | |
if z is 0.0 | |
x = 0.0 | |
else | |
y = 0.5 * Math.abs(z) | |
if y > Z_MAX * 0.5 | |
x = 1.0 | |
else if y < 1.0 | |
w = y * y | |
x = ((((((((0.000124818987 * w \ | |
- 0.001075204047) * w + 0.005198775019) * w \ | |
- 0.019198292004) * w + 0.059054035642) * w \ | |
- 0.151968751364) * w + 0.319152932694) * w \ | |
- 0.531923007300) * w + 0.797884560593) * y * 2.0 | |
else | |
y -= 2.0 | |
x = (((((((((((((-0.000045255659 * y \ | |
+ 0.000152529290) * y - 0.000019538132) * y \ | |
- 0.000676904986) * y + 0.001390604284) * y \ | |
- 0.000794620820) * y - 0.002034254874) * y \ | |
+ 0.006549791214) * y - 0.010557625006) * y \ | |
+ 0.011630447319) * y - 0.009279453341) * y \ | |
+ 0.005353579108) * y - 0.002141268741) * y \ | |
+ 0.000535310849) * y + 0.999936657524 | |
if z > 0.0 | |
return (x + 1.0) * 0.5 | |
else | |
return (1.0 - x) * 0.5 | |
# | |
# Compute critical normal z value to produce given p. | |
# We just do a bisection search for a value within CHI_EPSILON, | |
# relying on the monotonicity of pochisq(). | |
# | |
# source: https://www.fourmilab.ch/rpkp/experiments/analysis/zCalc.html | |
# | |
critz = (p) -> | |
Z_EPSILON = 0.000001 # Accuracy of z approximation | |
minz = -Z_MAX | |
maxz = Z_MAX | |
zval = 0.0 | |
pval = undefined | |
if p < 0.0 or p > 1.0 | |
return -1 | |
while maxz - minz > Z_EPSILON | |
pval = poz(zval) | |
if pval > p | |
maxz = zval | |
else | |
minz = zval | |
zval = (maxz + minz) * 0.5 | |
zval | |
proportionTrueNull = (results) -> | |
trueNull = _.sum results, ({isSignificant, xBar, yBar}) -> | |
if xBar > yBar and isSignificant | |
return 1 | |
else | |
return 0 | |
trueNull / results.length | |
############# | |
# Constants # | |
############# | |
tau = 0.5 # ???????? - free variable in significance | |
FDRthreshold = 0.10 | |
errorRate = 0.05 | |
data = | |
kpi1: | |
control: | |
rate: 0.2 | |
visitors: 10000 | |
var1: | |
rate: 0.22 | |
visitors: 10000 | |
kpi2: | |
control: | |
rate: 0.01 | |
visitors: 10000 | |
var1: | |
rate: 0.02 | |
visitors: 10000 | |
N = Object.keys(data.kpi1).length # number of a/b tests | |
####################### | |
# Conclusive Results # | |
# See (2) in paper # | |
####################### | |
conclusiveResults _.map data (data, kpi) -> | |
# You can imagine looking at more variables here too | |
xBar = kpi.control.rate | |
yBar = kpi.var1.rate | |
n = kpi.control.visitors + kpi.var1.visitors | |
thetaHat = yBar - xBar | |
alpha = errorRate | |
V = 2 * (xBar * (1 - xBar) + yBar * (1 - yBar)) / n | |
pHat = Math.sqrt( | |
(2 * Math.log(1 / alpha) - Math.log(V / (V + tau))) * | |
(V * (V + tau) / tau) | |
) | |
isSignificant = Math.abs(thetaHat) > pHat | |
{ | |
isSignificant | |
pHat | |
kpi | |
xBar | |
yBar | |
} | |
###################### | |
# Actionable Results # | |
# See (4) in paper # | |
###################### | |
FDRresults = _.map _.sortBy(conclusiveResults, 'pHat'), (result, index) -> | |
{pHat} = result | |
i = index + 1 | |
piZero = proportionTrueNull(conclusiveResults) | |
FDR = piZero * pHat / (i * N) | |
_.default {FDR}, result | |
# We can take action on these results! | |
significantResults = _.filter FDRresults, ({FDR}) -> | |
(1 - FDR) > FDRthreshold | |
######################## | |
# Confidence Intervals # | |
######################## | |
confidenceIntervals = _.map FDRresults, (result) -> | |
{xBar, yBar, isSignificant} = result | |
# Calculate std. deviation | |
mu = (xBar + yBar) / 2 | |
delta = Math.sqrt( | |
1 / 2 * | |
_.sum [xBar, yBar], (xi) -> | |
Math.pow(xi - mu, 2) | |
) | |
# See section 3.5 in paper | |
q = errorRate | |
m = significantResults.length | |
coverageLevel = if isSignificant | |
then (1 - q * m / N) | |
else (1 - q * (m + 1) / N) | |
z = critz(coverageLevel) | |
{ | |
low: yBar - z * delta / Math.sqrt(N) | |
high: yBar + z * delta / Math.sqrt(N) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment