josef-pkt · December 9, 2013 02:44
diff --git a/try_negbin_cluster.py b/try_negbin_cluster.py
 # -*- coding: utf-8 -*-
 """

 Created on Sun Dec 08 19:14:58 2013

 Author: Josef Perktold, based on example by John Beieler

 Not verified!

 """
 import numpy as np
 import pandas as pd
 import statsmodels.api as sm
 from patsy import dmatrices
 from statsmodels.stats.sandwich_covariance import cov_cluster
 import statsmodels.stats.sandwich_covariance as sc

 dat_raw = pd.read_csv('negbin_full_data.csv') 
 dat = dat_raw.dropna()
 #dat = dat_raw
 y, X = dmatrices('terrorCounts~rivalry+jointDem1+logcapratio+contiguity', 
                 data=dat, return_type='dataframe')

 res_poi = sm.Poisson(y, X, missing='drop').fit()

 # use estimated params to run faster during experimentation
 start_params = np.array([ -5.35096895,   2.34653792,   1.32354659,  
                         -0.15224597,    1.01714021,  64.78548562])
         
 mod = sm.NegativeBinomial(y, X, missing='drop')
 res0 = mod.fit(start_params=start_params, maxiter=60, method='bfgs')
 #start_params = np.array(res0.params)
 # see if newton works
 res = mod.fit(start_params=start_params, maxiter=60, method='newton')

 #check eigenvalues
 print np.linalg.eigvals(res0.cov_params())
 print np.linalg.eigvals(res.cov_params())

 print "\nbse non-robust 'newton'"
 print np.asarray(res.bse)

 bses = pd.DataFrame(res.bse, columns=['non-robust'])

 print "\nbse cluster robust 'newton'"

 # this is not working yet 
 #clustered = cov_cluster(res, dat['dyadid'])

 # NegativeBinomial has jac as scoreobs based on approx_fprime_cs
 jac = res.model.scoreobs(np.array(res.params))


 # fake a results instance that has the required attributes

 class Holder(object):
    def __init__(self, **kwds):
        self.__dict__.update(kwds)

 model_fake = Holder(exog=jac)
 res_fake = Holder(model=model_fake, resid=np.ones(jac.shape[0]),
                  normalized_cov_params=res.normalized_cov_params)


 gr = np.asarray(dat['dyadid'], int)   # need for bincount, why not int64?
 gr -= gr.min()
 clustered = cov_cluster(res_fake, gr)
 bse_clu = np.sqrt(np.diag(clustered))
 print bse_clu


 # calculate directly
 S = sc.S_crosssection(jac, gr)
 # The next part is _HCCM, which shouldn't require results
 xxi = res.normalized_cov_params
 H = np.dot(np.dot(xxi, S), xxi.T)
 bse_clu2 = np.sqrt(np.diag(H))
 print bse_clu2

 #add correction
 nobs, k_vars = X.shape
 n_groups = len(np.unique(gr))
 corr_fact = n_groups / (n_groups - 1.) * ((nobs-1.) / float(nobs - k_vars))

 bse_clu3 = np.sqrt(np.diag(H * corr_fact))
 print bse_clu3
 # should be same as using fake



 bses['robust_corr'] = bse_clu
 bses['robust_no_corr'] = bse_clu2
 bses['robust_corr_'] = bse_clu3

 print '\n'
 print bses

 ''' Results

 Note: "bfgs" ends up with singular Hessian, "newton" doesn't

 [  4.84419405e+23  -2.99759693e+53  -6.64613998e+35  -1.12701301e+36
   9.00058591e+34  -1.86552394e+34]
 [  1.91432527e+01   2.34646505e-02   1.52971738e-02   1.87594821e-03
   1.12834253e-03   9.22005732e-03]

 bse non-robust 'newton'
 [ 0.08170064  0.13790239  0.11111216  0.03587395  0.10845392  4.37528876]

 bse cluster robust 'newton'
 [ 0.18103571  0.46702637  0.27775385  0.06913378  0.25524573  9.47252558]
 [ 0.18099112  0.46691133  0.27768544  0.06911675  0.25518286  9.4701923 ]
 [ 0.18103433  0.4670228   0.27775173  0.06913326  0.25524379  9.4724533 ]


             non-robust  robust_corr  robust_no_corr  robust_corr_
 Intercept      0.081701     0.181036        0.180991      0.181034
 rivalry        0.137902     0.467026        0.466911      0.467023
 jointDem1      0.111112     0.277754        0.277685      0.277752
 logcapratio    0.035874     0.069134        0.069117      0.069133
 contiguity     0.108454     0.255246        0.255183      0.255244
 alpha          4.375289     9.472526        9.470192      9.472453
 
 '''
	# -- coding: utf-8 --
	"""

	Created on Sun Dec 08 19:14:58 2013

	Author: Josef Perktold, based on example by John Beieler

	Not verified!

	"""
	import numpy as np
	import pandas as pd
	import statsmodels.api as sm
	from patsy import dmatrices
	from statsmodels.stats.sandwich_covariance import cov_cluster
	import statsmodels.stats.sandwich_covariance as sc

	dat_raw = pd.read_csv('negbin_full_data.csv')
	dat = dat_raw.dropna()
	#dat = dat_raw
	y, X = dmatrices('terrorCounts~rivalry+jointDem1+logcapratio+contiguity',
	data=dat, return_type='dataframe')

	res_poi = sm.Poisson(y, X, missing='drop').fit()

	# use estimated params to run faster during experimentation
	start_params = np.array([ -5.35096895, 2.34653792, 1.32354659,
	-0.15224597, 1.01714021, 64.78548562])

	mod = sm.NegativeBinomial(y, X, missing='drop')
	res0 = mod.fit(start_params=start_params, maxiter=60, method='bfgs')
	#start_params = np.array(res0.params)
	# see if newton works
	res = mod.fit(start_params=start_params, maxiter=60, method='newton')

	#check eigenvalues
	print np.linalg.eigvals(res0.cov_params())
	print np.linalg.eigvals(res.cov_params())

	print "\nbse non-robust 'newton'"
	print np.asarray(res.bse)

	bses = pd.DataFrame(res.bse, columns=['non-robust'])

	print "\nbse cluster robust 'newton'"

	# this is not working yet
	#clustered = cov_cluster(res, dat['dyadid'])

	# NegativeBinomial has jac as scoreobs based on approx_fprime_cs
	jac = res.model.scoreobs(np.array(res.params))


	# fake a results instance that has the required attributes

	class Holder(object):
	def __init__(self, **kwds):
	self.__dict__.update(kwds)

	model_fake = Holder(exog=jac)
	res_fake = Holder(model=model_fake, resid=np.ones(jac.shape[0]),
	normalized_cov_params=res.normalized_cov_params)


	gr = np.asarray(dat['dyadid'], int) # need for bincount, why not int64?
	gr -= gr.min()
	clustered = cov_cluster(res_fake, gr)
	bse_clu = np.sqrt(np.diag(clustered))
	print bse_clu


	# calculate directly
	S = sc.S_crosssection(jac, gr)
	# The next part is _HCCM, which shouldn't require results
	xxi = res.normalized_cov_params
	H = np.dot(np.dot(xxi, S), xxi.T)
	bse_clu2 = np.sqrt(np.diag(H))
	print bse_clu2

	#add correction
	nobs, k_vars = X.shape
	n_groups = len(np.unique(gr))
	corr_fact = n_groups / (n_groups - 1.) * ((nobs-1.) / float(nobs - k_vars))

	bse_clu3 = np.sqrt(np.diag(H * corr_fact))
	print bse_clu3
	# should be same as using fake



	bses['robust_corr'] = bse_clu
	bses['robust_no_corr'] = bse_clu2
	bses['robust_corr_'] = bse_clu3

	print '\n'
	print bses

	''' Results

	Note: "bfgs" ends up with singular Hessian, "newton" doesn't

	[ 4.84419405e+23 -2.99759693e+53 -6.64613998e+35 -1.12701301e+36
	9.00058591e+34 -1.86552394e+34]
	[ 1.91432527e+01 2.34646505e-02 1.52971738e-02 1.87594821e-03
	1.12834253e-03 9.22005732e-03]

	bse non-robust 'newton'
	[ 0.08170064 0.13790239 0.11111216 0.03587395 0.10845392 4.37528876]

	bse cluster robust 'newton'
	[ 0.18103571 0.46702637 0.27775385 0.06913378 0.25524573 9.47252558]
	[ 0.18099112 0.46691133 0.27768544 0.06911675 0.25518286 9.4701923 ]
	[ 0.18103433 0.4670228 0.27775173 0.06913326 0.25524379 9.4724533 ]


	non-robust robust_corr robust_no_corr robust_corr_
	Intercept 0.081701 0.181036 0.180991 0.181034
	rivalry 0.137902 0.467026 0.466911 0.467023
	jointDem1 0.111112 0.277754 0.277685 0.277752
	logcapratio 0.035874 0.069134 0.069117 0.069133
	contiguity 0.108454 0.255246 0.255183 0.255244
	alpha 4.375289 9.472526 9.470192 9.472453

	'''