-
-
Save fperez/90dc99e3f289b3296dc4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:b72676dbc41821fbd81e425bb755bd535a4fe0523b66e89833e498b65051798d" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Original code sent by Philip\n", | |
"\n", | |
"Left here for reference/comparison" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import numpy as np\n", | |
"def stratifiedPermutationTest(group, condition, response, iterations):\n", | |
" '''\n", | |
" Stratified permutation test using the sum of the differences in means between two conditions in\n", | |
" each group (stratum) as the test statistic.\n", | |
" The test statistic is\n", | |
" \\sum_{g in groups} [\n", | |
" mean(response for cases in group g assigned to first condition) -\n", | |
" mean(response for cases in group g assigned to second condition)\n", | |
" ].\n", | |
" There should be at least one group and no more than two conditions.\n", | |
" Under the null hypothesis, all assignments to the two conditions that preserve the number of\n", | |
" cases assigned to the conditions are equally likely.\n", | |
" Groups in which all cases are assigned to the same condition are skipped; they do not contribute \n", | |
" to the p-value since all randomizations give the same contribution to the difference in means.\n", | |
" \n", | |
" Dependencies: numpy (as np)\n", | |
" ''' \n", | |
" groups = np.unique(group)\n", | |
" conditions = np.unique(condition) # to do: test that number of conditions <= 2\n", | |
" def testStatistic(group, condition, response, groups, conditions):\n", | |
" tst = 0.0\n", | |
" for g in groups:\n", | |
" x = (group == g) & (condition == conditions[0])\n", | |
" y = (group == g) & (condition == conditions[1])\n", | |
" if (any(x) & any(y)):\n", | |
" tst = tst + \\\n", | |
" np.mean(response[(group == g) & (condition == conditions[0])]) - \\\n", | |
" np.mean(response[(group == g) & (condition == conditions[1])])\n", | |
" return tst\n", | |
"# \n", | |
" def permuteWithinGroups(group, condition, groups):\n", | |
" permuted = condition\n", | |
" for g in groups:\n", | |
" permuted[group == g] = np.random.permutation(condition[group == g]) \n", | |
" return permuted\n", | |
"#\n", | |
"# (alternate approaches: assign randoms to all, then sort, or use pandas.groupby and transform(np.random.permutation))\n", | |
" tst = testStatistic(group, condition, response, groups, conditions)\n", | |
" dist = np.zeros(iterations)\n", | |
" for i in np.arange(iterations):\n", | |
" dist[i] = testStatistic( group, \n", | |
" permuteWithinGroups(group, condition, groups), \\\n", | |
" response, groups, conditions\\\n", | |
" )\n", | |
" pLeft = float(np.sum(dist <= tst))/float(iterations)\n", | |
" pRight = float(np.sum(dist >= tst))/float(iterations)\n", | |
" pBoth = float(np.sum(abs(dist) >= abs(tst)))/float(iterations)\n", | |
" return pLeft, pRight, pBoth, tst, dist" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 24 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"%%time\n", | |
"group = np.array([0, 0, 0, 1, 1, 1])\n", | |
"condition = np.array([0, 0, 1, 0, 1, 1])\n", | |
"response = np.array([0, 0, 1, 0, 1, 1])\n", | |
"# for these data, left-sided and two-sided p-values should be 1/9 = .111\n", | |
"iterations = 10**4\n", | |
"leftP, rightP, bothP, teststat, dist = stratifiedPermutationTest(group, condition, response, iterations)\n", | |
"print \"left, right, both p:\", leftP, rightP, bothP, \"test statistic:\", teststat" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"left, right, both p: 0.1099 1.0 0.1099 test statistic: -2.0\n", | |
"CPU times: user 1.4 s, sys: 2.1 ms, total: 1.41 s\n", | |
"Wall time: 1.41 s\n" | |
] | |
} | |
], | |
"prompt_number": 25 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Updated code" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import numpy as np\n", | |
"\n", | |
"# Move all the nested functions out, this makes it easier to understand\n", | |
"# what each does, and less likely to accidentally capture state\n", | |
"\n", | |
"def testStatistic(group, condition, response, groups, conditions):\n", | |
" tst = 0.0\n", | |
" # Simplified by creating a few auxiliary variables. The python 'compiler'\n", | |
" # is about as dumb as it gets, so we need to help it by lifting constants\n", | |
" # manually. In this case, in addition to being a performance optimization\n", | |
" # I also think it makes the code clearer, it's easier to see what's being\n", | |
" # computed only once and reused.\n", | |
" \n", | |
" c0 = condition == conditions[0]\n", | |
" c1 = condition == conditions[1]\n", | |
" for g in groups:\n", | |
" gg = group == g\n", | |
" x = gg & c0\n", | |
" y = gg & c1\n", | |
" if (any(x) & any(y)):\n", | |
" tst += response[x].mean() - response[y].mean()\n", | |
" return tst\n", | |
"\n", | |
"\n", | |
"def permuteWithinGroups(group, condition, groups):\n", | |
" permuted = condition\n", | |
" for g in groups:\n", | |
" gg = group == g\n", | |
" permuted[gg] = np.random.permutation(condition[gg]) \n", | |
" return permuted\n", | |
"\n", | |
"\n", | |
"def stratifiedPermutationTest(group, condition, response, iterations):\n", | |
" '''\n", | |
" Stratified permutation test using the sum of the differences in means between two conditions in\n", | |
" each group (stratum) as the test statistic.\n", | |
" The test statistic is\n", | |
" \\sum_{g in groups} [\n", | |
" mean(response for cases in group g assigned to first condition) -\n", | |
" mean(response for cases in group g assigned to second condition)\n", | |
" ].\n", | |
" There should be at least one group and no more than two conditions.\n", | |
" Under the null hypothesis, all assignments to the two conditions that preserve the number of\n", | |
" cases assigned to the conditions are equally likely.\n", | |
" Groups in which all cases are assigned to the same condition are skipped; they do not contribute \n", | |
" to the p-value since all randomizations give the same contribution to the difference in means.\n", | |
" \n", | |
" Dependencies: numpy (as np)\n", | |
" ''' \n", | |
" groups = np.unique(group)\n", | |
" conditions = np.unique(condition) # to do: test that number of conditions <= 2\n", | |
"\n", | |
" # (alternate approaches: assign randoms to all, then sort, or use pandas.groupby and transform(np.random.permutation))\n", | |
" tst = testStatistic(group, condition, response, groups, conditions)\n", | |
" dist = np.zeros(iterations)\n", | |
" for i in range(iterations):\n", | |
" dist[i] = testStatistic( group, \n", | |
" permuteWithinGroups(group, condition, groups),\n", | |
" response, groups, conditions\n", | |
" )\n", | |
" \n", | |
" # define the conditions in one place, and then simply map count_nonzero over the lot\n", | |
" conds = [dist <= tst, dist >= tst, abs(dist) >= abs(tst)]\n", | |
" pLeft, pRight, pBoth = np.array(map(np.count_nonzero, conds))/float(iterations)\n", | |
" return pLeft, pRight, pBoth, tst, dist" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 31 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"%%time\n", | |
"group = np.array([0, 0, 0, 1, 1, 1])\n", | |
"condition = np.array([0, 0, 1, 0, 1, 1])\n", | |
"response = np.array([0, 0, 1, 0, 1, 1])\n", | |
"# for these data, left-sided and two-sided p-values should be 1/9 = .111\n", | |
"iterations = 10**4\n", | |
"leftP, rightP, bothP, teststat, dist = stratifiedPermutationTest(group, condition, response, iterations)\n", | |
"print \"left, right, both p:\", leftP, rightP, bothP, \"test statistic:\", teststat" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"left, right, both p: 0.1133 1.0 0.1133 test statistic: -2.0\n", | |
"CPU times: user 983 ms, sys: 4 ms, total: 987 ms\n", | |
"Wall time: 987 ms\n" | |
] | |
} | |
], | |
"prompt_number": 32 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment