Skip to content

Instantly share code, notes, and snippets.

@fperez
Forked from pbstark/testPerm.ipynb
Last active August 29, 2015 14:02
Show Gist options
  • Save fperez/90dc99e3f289b3296dc4 to your computer and use it in GitHub Desktop.
Save fperez/90dc99e3f289b3296dc4 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:b72676dbc41821fbd81e425bb755bd535a4fe0523b66e89833e498b65051798d"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Original code sent by Philip\n",
"\n",
"Left here for reference/comparison"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"def stratifiedPermutationTest(group, condition, response, iterations):\n",
" '''\n",
" Stratified permutation test using the sum of the differences in means between two conditions in\n",
" each group (stratum) as the test statistic.\n",
" The test statistic is\n",
" \\sum_{g in groups} [\n",
" mean(response for cases in group g assigned to first condition) -\n",
" mean(response for cases in group g assigned to second condition)\n",
" ].\n",
" There should be at least one group and no more than two conditions.\n",
" Under the null hypothesis, all assignments to the two conditions that preserve the number of\n",
" cases assigned to the conditions are equally likely.\n",
" Groups in which all cases are assigned to the same condition are skipped; they do not contribute \n",
" to the p-value since all randomizations give the same contribution to the difference in means.\n",
" \n",
" Dependencies: numpy (as np)\n",
" ''' \n",
" groups = np.unique(group)\n",
" conditions = np.unique(condition) # to do: test that number of conditions <= 2\n",
" def testStatistic(group, condition, response, groups, conditions):\n",
" tst = 0.0\n",
" for g in groups:\n",
" x = (group == g) & (condition == conditions[0])\n",
" y = (group == g) & (condition == conditions[1])\n",
" if (any(x) & any(y)):\n",
" tst = tst + \\\n",
" np.mean(response[(group == g) & (condition == conditions[0])]) - \\\n",
" np.mean(response[(group == g) & (condition == conditions[1])])\n",
" return tst\n",
"# \n",
" def permuteWithinGroups(group, condition, groups):\n",
" permuted = condition\n",
" for g in groups:\n",
" permuted[group == g] = np.random.permutation(condition[group == g]) \n",
" return permuted\n",
"#\n",
"# (alternate approaches: assign randoms to all, then sort, or use pandas.groupby and transform(np.random.permutation))\n",
" tst = testStatistic(group, condition, response, groups, conditions)\n",
" dist = np.zeros(iterations)\n",
" for i in np.arange(iterations):\n",
" dist[i] = testStatistic( group, \n",
" permuteWithinGroups(group, condition, groups), \\\n",
" response, groups, conditions\\\n",
" )\n",
" pLeft = float(np.sum(dist <= tst))/float(iterations)\n",
" pRight = float(np.sum(dist >= tst))/float(iterations)\n",
" pBoth = float(np.sum(abs(dist) >= abs(tst)))/float(iterations)\n",
" return pLeft, pRight, pBoth, tst, dist"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 24
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%time\n",
"group = np.array([0, 0, 0, 1, 1, 1])\n",
"condition = np.array([0, 0, 1, 0, 1, 1])\n",
"response = np.array([0, 0, 1, 0, 1, 1])\n",
"# for these data, left-sided and two-sided p-values should be 1/9 = .111\n",
"iterations = 10**4\n",
"leftP, rightP, bothP, teststat, dist = stratifiedPermutationTest(group, condition, response, iterations)\n",
"print \"left, right, both p:\", leftP, rightP, bothP, \"test statistic:\", teststat"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"left, right, both p: 0.1099 1.0 0.1099 test statistic: -2.0\n",
"CPU times: user 1.4 s, sys: 2.1 ms, total: 1.41 s\n",
"Wall time: 1.41 s\n"
]
}
],
"prompt_number": 25
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Updated code"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"\n",
"# Move all the nested functions out, this makes it easier to understand\n",
"# what each does, and less likely to accidentally capture state\n",
"\n",
"def testStatistic(group, condition, response, groups, conditions):\n",
" tst = 0.0\n",
" # Simplified by creating a few auxiliary variables. The python 'compiler'\n",
" # is about as dumb as it gets, so we need to help it by lifting constants\n",
" # manually. In this case, in addition to being a performance optimization\n",
" # I also think it makes the code clearer, it's easier to see what's being\n",
" # computed only once and reused.\n",
" \n",
" c0 = condition == conditions[0]\n",
" c1 = condition == conditions[1]\n",
" for g in groups:\n",
" gg = group == g\n",
" x = gg & c0\n",
" y = gg & c1\n",
" if (any(x) & any(y)):\n",
" tst += response[x].mean() - response[y].mean()\n",
" return tst\n",
"\n",
"\n",
"def permuteWithinGroups(group, condition, groups):\n",
" permuted = condition\n",
" for g in groups:\n",
" gg = group == g\n",
" permuted[gg] = np.random.permutation(condition[gg]) \n",
" return permuted\n",
"\n",
"\n",
"def stratifiedPermutationTest(group, condition, response, iterations):\n",
" '''\n",
" Stratified permutation test using the sum of the differences in means between two conditions in\n",
" each group (stratum) as the test statistic.\n",
" The test statistic is\n",
" \\sum_{g in groups} [\n",
" mean(response for cases in group g assigned to first condition) -\n",
" mean(response for cases in group g assigned to second condition)\n",
" ].\n",
" There should be at least one group and no more than two conditions.\n",
" Under the null hypothesis, all assignments to the two conditions that preserve the number of\n",
" cases assigned to the conditions are equally likely.\n",
" Groups in which all cases are assigned to the same condition are skipped; they do not contribute \n",
" to the p-value since all randomizations give the same contribution to the difference in means.\n",
" \n",
" Dependencies: numpy (as np)\n",
" ''' \n",
" groups = np.unique(group)\n",
" conditions = np.unique(condition) # to do: test that number of conditions <= 2\n",
"\n",
" # (alternate approaches: assign randoms to all, then sort, or use pandas.groupby and transform(np.random.permutation))\n",
" tst = testStatistic(group, condition, response, groups, conditions)\n",
" dist = np.zeros(iterations)\n",
" for i in range(iterations):\n",
" dist[i] = testStatistic( group, \n",
" permuteWithinGroups(group, condition, groups),\n",
" response, groups, conditions\n",
" )\n",
" \n",
" # define the conditions in one place, and then simply map count_nonzero over the lot\n",
" conds = [dist <= tst, dist >= tst, abs(dist) >= abs(tst)]\n",
" pLeft, pRight, pBoth = np.array(map(np.count_nonzero, conds))/float(iterations)\n",
" return pLeft, pRight, pBoth, tst, dist"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 31
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%time\n",
"group = np.array([0, 0, 0, 1, 1, 1])\n",
"condition = np.array([0, 0, 1, 0, 1, 1])\n",
"response = np.array([0, 0, 1, 0, 1, 1])\n",
"# for these data, left-sided and two-sided p-values should be 1/9 = .111\n",
"iterations = 10**4\n",
"leftP, rightP, bothP, teststat, dist = stratifiedPermutationTest(group, condition, response, iterations)\n",
"print \"left, right, both p:\", leftP, rightP, bothP, \"test statistic:\", teststat"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"left, right, both p: 0.1133 1.0 0.1133 test statistic: -2.0\n",
"CPU times: user 983 ms, sys: 4 ms, total: 987 ms\n",
"Wall time: 987 ms\n"
]
}
],
"prompt_number": 32
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment