Last active
December 29, 2015 12:58
-
-
Save astraw/7673689 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "Zar birds two-way ANOVA" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "Attempt to product Example 12.2 \"Computations for the Model I two-factor ANOVA\" from the book\n\nZar, J.H. (1999) Biostatistical Analysis, 4th ed.\n\n" | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "import pandas\nimport numpy as np\n\nimport statsmodels\nfrom statsmodels.formula.api import ols\nfrom statsmodels.stats.anova import anova_lm\n\nno_hormone = np.array([[16.5, 14.5],\n [18.4, 11.0],\n [12.7, 10.8],\n [14.0, 14.3],\n [12.8, 10.0]])\nhormone = np.array([[39.1, 32.0],\n [26.2, 23.8],\n [21.3, 28.8],\n [35.8, 25.0],\n [40.2, 29.3]])\n\n# data munging...\npivot = []\nfor hormone_value,hormone_data in enumerate([no_hormone,hormone]):\n for i,sex in enumerate(['female','male']):\n col = hormone_data[:,i]\n for j, el in enumerate(col):\n pivot.append( (hormone_value, sex, el, j) )\n\ndata = {}\nfor n,r in zip(['hormone_treatment','sex','calcium','bird_num'],\n zip(*pivot)):\n \n data[n]=r\ndf = pandas.DataFrame(data)", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "lm = ols('calcium ~ C(hormone_treatment)*C(sex)', df).fit()", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "lm.summary()", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": "<table class=\"simpletable\">\n<caption>OLS Regression Results</caption>\n<tr>\n <th>Dep. Variable:</th> <td>calcium</td> <th> R-squared: </th> <td> 0.800</td>\n</tr>\n<tr>\n <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.762</td>\n</tr>\n<tr>\n <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 21.27</td>\n</tr>\n<tr>\n <th>Date:</th> <td>Wed, 27 Nov 2013</td> <th> Prob (F-statistic):</th> <td>7.89e-06</td>\n</tr>\n<tr>\n <th>Time:</th> <td>16:17:49</td> <th> Log-Likelihood: </th> <td> -57.458</td>\n</tr>\n<tr>\n <th>No. Observations:</th> <td> 20</td> <th> AIC: </th> <td> 122.9</td>\n</tr>\n<tr>\n <th>Df Residuals:</th> <td> 16</td> <th> BIC: </th> <td> 126.9</td>\n</tr>\n<tr>\n <th>Df Model:</th> <td> 3</td> <th> </th> <td> </td> \n</tr>\n</table>\n<table class=\"simpletable\">\n<tr>\n <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[95.0% Conf. Int.]</th> \n</tr>\n<tr>\n <th>Intercept</th> <td> 14.8800</td> <td> 2.140</td> <td> 6.953</td> <td> 0.000</td> <td> 10.343 19.417</td>\n</tr>\n<tr>\n <th>C(hormone_treatment)[T.1]</th> <td> 17.6400</td> <td> 3.026</td> <td> 5.829</td> <td> 0.000</td> <td> 11.224 24.056</td>\n</tr>\n<tr>\n <th>C(sex)[T.male]</th> <td> -2.7600</td> <td> 3.026</td> <td> -0.912</td> <td> 0.375</td> <td> -9.176 3.656</td>\n</tr>\n<tr>\n <th>C(hormone_treatment)[T.1]:C(sex)[T.male]</th> <td> -1.9800</td> <td> 4.280</td> <td> -0.463</td> <td> 0.650</td> <td> -11.053 7.093</td>\n</tr>\n</table>\n<table class=\"simpletable\">\n<tr>\n <th>Omnibus:</th> <td> 2.821</td> <th> Durbin-Watson: </th> <td> 2.006</td>\n</tr>\n<tr>\n <th>Prob(Omnibus):</th> <td> 0.244</td> <th> Jarque-Bera (JB): </th> <td> 1.251</td>\n</tr>\n<tr>\n <th>Skew:</th> <td>-0.547</td> <th> Prob(JB): </th> <td> 0.535</td>\n</tr>\n<tr>\n <th>Kurtosis:</th> <td> 3.550</td> <th> Cond. No. </th> <td> 6.85</td>\n</tr>\n</table>", | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": "<class 'statsmodels.iolib.summary.Summary'>\n\"\"\"\n OLS Regression Results \n==============================================================================\nDep. Variable: calcium R-squared: 0.800\nModel: OLS Adj. R-squared: 0.762\nMethod: Least Squares F-statistic: 21.27\nDate: Wed, 27 Nov 2013 Prob (F-statistic): 7.89e-06\nTime: 16:17:49 Log-Likelihood: -57.458\nNo. Observations: 20 AIC: 122.9\nDf Residuals: 16 BIC: 126.9\nDf Model: 3 \n============================================================================================================\n coef std err t P>|t| [95.0% Conf. Int.]\n------------------------------------------------------------------------------------------------------------\nIntercept 14.8800 2.140 6.953 0.000 10.343 19.417\nC(hormone_treatment)[T.1] 17.6400 3.026 5.829 0.000 11.224 24.056\nC(sex)[T.male] -2.7600 3.026 -0.912 0.375 -9.176 3.656\nC(hormone_treatment)[T.1]:C(sex)[T.male] -1.9800 4.280 -0.463 0.650 -11.053 7.093\n==============================================================================\nOmnibus: 2.821 Durbin-Watson: 2.006\nProb(Omnibus): 0.244 Jarque-Bera (JB): 1.251\nSkew: -0.547 Prob(JB): 0.535\nKurtosis: 3.550 Cond. No. 6.85\n==============================================================================\n\"\"\"" | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "anova_lm(lm)", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>df</th>\n <th>sum_sq</th>\n <th>mean_sq</th>\n <th>F</th>\n <th>PR(>F)</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>C(hormone_treatment)</th>\n <td> 1</td>\n <td> 1386.1125</td>\n <td> 1386.11250</td>\n <td> 60.533556</td>\n <td> 0.000001</td>\n </tr>\n <tr>\n <th>C(sex)</th>\n <td> 1</td>\n <td> 70.3125</td>\n <td> 70.31250</td>\n <td> 3.070650</td>\n <td> 0.098856</td>\n </tr>\n <tr>\n <th>C(hormone_treatment):C(sex)</th>\n <td> 1</td>\n <td> 4.9005</td>\n <td> 4.90050</td>\n <td> 0.214012</td>\n <td> 0.649870</td>\n </tr>\n <tr>\n <th>Residual</th>\n <td> 16</td>\n <td> 366.3720</td>\n <td> 22.89825</td>\n <td> NaN</td>\n <td> NaN</td>\n </tr>\n </tbody>\n</table>\n</div>", | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 4, | |
"text": " df sum_sq mean_sq F PR(>F)\nC(hormone_treatment) 1 1386.1125 1386.11250 60.533556 0.000001\nC(sex) 1 70.3125 70.31250 3.070650 0.098856\nC(hormone_treatment):C(sex) 1 4.9005 4.90050 0.214012 0.649870\nResidual 16 366.3720 22.89825 NaN NaN" | |
} | |
], | |
"prompt_number": 4 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment