Created
April 3, 2019 12:37
-
-
Save outman/7f793a8feeb4a89e3ac2b35ff73ca3a9 to your computer and use it in GitHub Desktop.
双样本置信区间
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy.stats as st | |
import random | |
apply_a = [] | |
apply_b = [] | |
## AB 两组人数各为 20000 人 | |
for times in range(20000): | |
# AB 每组的报名次数 | |
apply_a.append(random.randint(0, 10)) | |
apply_b.append(random.randint(2, 7)) | |
## AB 平均值 | |
mean_a = np.mean(apply_a) | |
mean_b = np.mean(apply_b) | |
print('mean_a={},mean_b={}'.format(mean_a, mean_b)) | |
## AB 标准差 | |
std_a = np.std(apply_a) | |
std_b = np.std(apply_b) | |
print('std_a={},std_b={}'.format(std_a, std_b)) | |
## 数据分布省略 | |
''' | |
检验防线 | |
备选假设A版本和B版本有差别,即A版本的平均值≠B版本的平均值,这里使用双尾检验 | |
''' | |
''' | |
在零假设成立前提下计算 t 和 p_value | |
''' | |
t, p_value = st.ttest_ind(apply_a, apply_b) | |
print('t={},p-value={}'.format(t, p_value)) | |
''' | |
判断标准,显著水平 alpha=0.05 | |
''' | |
alpha = 0.05 | |
if (p_value < alpha / 2): | |
print('拒绝零假设,有统计显著,A版本和B版本有差异') | |
else: | |
print('接受零假设,没有统计显著,A版本和B版本没有差异') | |
''' | |
自由度 | |
''' | |
df = len(apply_a) + len(apply_b) - 2 | |
''' | |
置信水平对应的t值(t_ci) | |
根据双侧置信度 95% 和自由度 df 查表得到 t_ci 的值为 1.960 | |
''' | |
t_ci = 1.960 | |
## 计算标准误差 | |
se = np.sqrt(np.square(std_a) / 20000 + np.square(std_b) / 20000) | |
''' | |
对于双独立样本检验 | |
置信区间的样本平均值=A版本平均值 - B版本平均值 | |
''' | |
mean = mean_a - mean_b | |
x = mean - t_ci * se | |
y = mean + t_ci * se | |
print('置信区间 ({}, {})'.format(x, y)) | |
## 效应量计算 | |
## 合并标准差 | |
sp = np.sqrt(((20000-1) * np.square(std_a) + (20000-1) * np.square(std_b)) / (20000+20000-2)) | |
print('效应量 d = {}'.format((mean_a - mean_b) / sp)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment