-
-
Save IanniMuliterno/46506abe44d6817b726e6bb554852862 to your computer and use it in GitHub Desktop.
Basic illustration of repeat testing of significance error in Perl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use Statistics::Gtest; | |
use Statistics::Distributions; | |
use Data::Dumper; | |
# | |
# Just set up some basics | |
# | |
# How many variants and participants to include | |
my $VARIANTS = 2; | |
my $SAMPLE_SIZE = 100_000; | |
my $PARTICIPANTS = $SAMPLE_SIZE * $VARIANTS; | |
# Conversion rate of mock experiments | |
my $CONVERSION_RATE = 3; | |
# Chisquare distribution levels | |
my $GTEST_CUTOFF_90 = 2.7105; # This means significance at .10 | |
my $GTEST_CUTOFF_95 = 3.8502; # This means significance at .05 | |
# Number of mock experiments to run | |
my $EXPERIMENTS = 1000; | |
# How many participants to do before starting checking | |
my $CHECK_CUTOFF = 1000; | |
# | |
# Counts | |
# | |
my $totals90 = 0; | |
my $totals95 = 0; | |
my $only_final90 = 0; | |
my $only_final95 = 0; | |
print <<EOT; | |
Running false positive checks with these metrics; | |
Sample size: $SAMPLE_SIZE | |
Variants: $VARIANTS | |
Conversion rate: $CONVERSION_RATE | |
Experiments: $EXPERIMENTS | |
EOT | |
# | |
# This is where the experiments are ran. | |
# | |
for my $experiment (1..$EXPERIMENTS) { | |
my @variant; | |
push @variant, [0, 0] for (0..$VARIANTS-1); | |
my $finding90 = 0; | |
my $finding95 = 0; | |
# | |
# One pretend experiment with $participants and the same $CONVERSION_RATE in | |
# each variant, randomly assigning each participant to a variant at time seen | |
# | |
for my $participant (0..$PARTICIPANTS) { | |
my $var = int rand( $VARIANTS ); | |
(rand(100)<=$CONVERSION_RATE) | |
? $variant[$var]->[0]++ # hits | |
: $variant[$var]->[1]++; # fails | |
next if $participant < $CHECK_CUTOFF; | |
# next unless int rand(100) == 5; | |
my $g = new Statistics::Gtest(\@variant); | |
my $float = $g->getG(); | |
$finding90++ if $float>=$GTEST_CUTOFF_90; | |
$finding95++ if $float>=$GTEST_CUTOFF_95; | |
} | |
my $g = new Statistics::Gtest(\@variant); | |
my $float = $g->getG(); | |
$only_final90++ if $float>=$GTEST_CUTOFF_90; | |
$only_final95++ if $float>=$GTEST_CUTOFF_95; | |
$totals90++ if $finding90; | |
$totals95++ if $finding95; | |
printf "Experiment %4s of $EXPERIMENTS: (%6s at p<.05) (%6s at p<0.10). Hitrate: $totals90 of $experiment at p<0.10, $totals95 of $experiment at p<0.05. (One check: $only_final90 of $experiment at p<0.1, $only_final95 of $experiment at p<0.05) \n", | |
$experiment, $finding95, $finding90; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment