outman · April 3, 2019 12:37
diff --git a/ci.py b/ci.py
 import numpy as np
 import scipy.stats as st
 import random

 apply_a = []
 apply_b = []

 ## AB 两组人数各为 20000 人
 for times in range(20000):
    # AB 每组的报名次数
    apply_a.append(random.randint(0, 10))
    apply_b.append(random.randint(2, 7))

 ## AB 平均值
 mean_a = np.mean(apply_a)
 mean_b = np.mean(apply_b)

 print('mean_a={},mean_b={}'.format(mean_a, mean_b))

 ## AB 标准差
 std_a = np.std(apply_a)
 std_b = np.std(apply_b)

 print('std_a={},std_b={}'.format(std_a, std_b))

 ## 数据分布省略

 '''
 检验防线
 备选假设A版本和B版本有差别，即A版本的平均值≠B版本的平均值，这里使用双尾检验
 '''

 '''
 在零假设成立前提下计算 t 和 p_value
 '''
 t, p_value = st.ttest_ind(apply_a, apply_b)
 print('t={},p-value={}'.format(t, p_value))

 '''
 判断标准,显著水平 alpha=0.05
 '''
 alpha = 0.05
 if (p_value < alpha / 2):
    print('拒绝零假设，有统计显著，A版本和B版本有差异')
 else:
    print('接受零假设，没有统计显著，A版本和B版本没有差异')

 '''
 自由度
 '''
 df = len(apply_a) + len(apply_b) - 2

 '''
 置信水平对应的t值（t_ci）
 根据双侧置信度 95% 和自由度 df 查表得到 t_ci 的值为 1.960
 '''
 t_ci = 1.960

 ## 计算标准误差
 se = np.sqrt(np.square(std_a) / 20000 + np.square(std_b) / 20000)

 '''
 对于双独立样本检验
 置信区间的样本平均值=A版本平均值 - B版本平均值
 '''

 mean = mean_a - mean_b
 x = mean - t_ci * se
 y = mean + t_ci * se

 print('置信区间 ({}, {})'.format(x, y))

 ## 效应量计算
 ## 合并标准差
 sp = np.sqrt(((20000-1) * np.square(std_a) + (20000-1) * np.square(std_b)) / (20000+20000-2))
 print('效应量 d = {}'.format((mean_a - mean_b) / sp))
	import numpy as np
	import scipy.stats as st
	import random

	apply_a = []
	apply_b = []

	## AB 两组人数各为 20000 人
	for times in range(20000):
	# AB 每组的报名次数
	apply_a.append(random.randint(0, 10))
	apply_b.append(random.randint(2, 7))

	## AB 平均值
	mean_a = np.mean(apply_a)
	mean_b = np.mean(apply_b)

	print('mean_a={},mean_b={}'.format(mean_a, mean_b))

	## AB 标准差
	std_a = np.std(apply_a)
	std_b = np.std(apply_b)

	print('std_a={},std_b={}'.format(std_a, std_b))

	## 数据分布省略

	'''
	检验防线
	备选假设A版本和B版本有差别，即A版本的平均值≠B版本的平均值，这里使用双尾检验
	'''

	'''
	在零假设成立前提下计算 t 和 p_value
	'''
	t, p_value = st.ttest_ind(apply_a, apply_b)
	print('t={},p-value={}'.format(t, p_value))

	'''
	判断标准,显著水平 alpha=0.05
	'''
	alpha = 0.05
	if (p_value < alpha / 2):
	print('拒绝零假设，有统计显著，A版本和B版本有差异')
	else:
	print('接受零假设，没有统计显著，A版本和B版本没有差异')

	'''
	自由度
	'''
	df = len(apply_a) + len(apply_b) - 2

	'''
	置信水平对应的t值（t_ci）
	根据双侧置信度 95% 和自由度 df 查表得到 t_ci 的值为 1.960
	'''
	t_ci = 1.960

	## 计算标准误差
	se = np.sqrt(np.square(std_a) / 20000 + np.square(std_b) / 20000)

	'''
	对于双独立样本检验
	置信区间的样本平均值=A版本平均值 - B版本平均值
	'''

	mean = mean_a - mean_b
	x = mean - t_ci * se
	y = mean + t_ci * se

	print('置信区间 ({}, {})'.format(x, y))

	## 效应量计算
	## 合并标准差
	sp = np.sqrt(((20000-1) * np.square(std_a) + (20000-1) * np.square(std_b)) / (20000+20000-2))
	print('效应量 d = {}'.format((mean_a - mean_b) / sp))