DexGroves · August 17, 2020 10:04
diff --git a/se-control.py b/se-control.py
 import numpy as np
 import pandas as pd
 from sklearn.linear_model import LinearRegression

 N = 10_000
 np.random.seed(42)

 df = pd.DataFrame(index=np.arange(N))

 df['x2'] = np.random.choice([1,2,3], size=N)
 df['x1'] = np.random.normal(size=N)

 df['y'] = np.nan

 df.loc[df.x2 == 1, 'y'] = df.x1 * 0
 df.loc[df.x2 == 2, 'y'] = df.x1 * 0
 df.loc[df.x2 == 3, 'x1'] = df.loc[df.x2 == 3, 'x1'] / 10
 df.loc[df.x2 == 3, 'y'] = df.x1 * 10
 df.y += np.random.normal(scale=0.1, size=N)

 flm = LinearRegression()
 flm.fit(X=df[['x1', 'x2']], y=df.y)

 intercept, coef = flm.intercept_, flm.coef_

 lms = {}
 for x2, grp in df.groupby('x2'):
    lm = LinearRegression()
    lm.fit(X=grp[['x1']], y=grp.y)
    lms[x2] = dict(N=len(grp), model=lm)

 # Partial regression coefficient for X1
 coef[0]

 # Weighted average of individual regression coefficients on X1
 sum([lm['model'].coef_ * lm['N'] for lm in lms.values()]) / N
	import numpy as np
	import pandas as pd
	from sklearn.linear_model import LinearRegression

	N = 10_000
	np.random.seed(42)

	df = pd.DataFrame(index=np.arange(N))

	df['x2'] = np.random.choice([1,2,3], size=N)
	df['x1'] = np.random.normal(size=N)

	df['y'] = np.nan

	df.loc[df.x2 == 1, 'y'] = df.x1 * 0
	df.loc[df.x2 == 2, 'y'] = df.x1 * 0
	df.loc[df.x2 == 3, 'x1'] = df.loc[df.x2 == 3, 'x1'] / 10
	df.loc[df.x2 == 3, 'y'] = df.x1 * 10
	df.y += np.random.normal(scale=0.1, size=N)

	flm = LinearRegression()
	flm.fit(X=df[['x1', 'x2']], y=df.y)

	intercept, coef = flm.intercept_, flm.coef_

	lms = {}
	for x2, grp in df.groupby('x2'):
	lm = LinearRegression()
	lm.fit(X=grp[['x1']], y=grp.y)
	lms[x2] = dict(N=len(grp), model=lm)

	# Partial regression coefficient for X1
	coef[0]

	# Weighted average of individual regression coefficients on X1
	sum([lm['model'].coef_ * lm['N'] for lm in lms.values()]) / N
No results found