fperez · August 29, 2015 14:02
diff --git a/testPerm.ipynb b/testPerm.ipynb
 {
 "metadata": {
  "name": "",
  "signature": "sha256:b72676dbc41821fbd81e425bb755bd535a4fe0523b66e89833e498b65051798d"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "# Original code sent by Philip\n",
      "\n",
      "Left here for reference/comparison"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import numpy as np\n",
      "def stratifiedPermutationTest(group, condition, response, iterations):\n",
      "    '''\n",
      "    Stratified permutation test using the sum of the differences in means between two conditions in\n",
      "    each group (stratum) as the test statistic.\n",
      "    The test statistic is\n",
      "        \\sum_{g in groups} [\n",
      "                            mean(response for cases in group g assigned to first condition) -\n",
      "                            mean(response for cases in group g assigned to second condition)\n",
      "                           ].\n",
      "    There should be at least one group and no more than two conditions.\n",
      "    Under the null hypothesis, all assignments to the two conditions that preserve the number of\n",
      "    cases assigned to the conditions are equally likely.\n",
      "    Groups in which all cases are assigned to the same condition are skipped; they do not contribute \n",
      "    to the p-value since all randomizations give the same contribution to the difference in means.\n",
      "    \n",
      "    Dependencies: numpy (as np)\n",
      "    '''   \n",
      "    groups = np.unique(group)\n",
      "    conditions = np.unique(condition) # to do: test that number of conditions <= 2\n",
      "    def testStatistic(group, condition, response, groups, conditions):\n",
      "        tst = 0.0\n",
      "        for g in groups:\n",
      "             x = (group == g) & (condition == conditions[0])\n",
      "             y = (group == g) & (condition == conditions[1])\n",
      "             if (any(x) & any(y)):\n",
      "                 tst = tst + \\\n",
      "                      np.mean(response[(group == g) & (condition == conditions[0])]) - \\\n",
      "                      np.mean(response[(group == g) & (condition == conditions[1])])\n",
      "        return tst\n",
      "#   \n",
      "    def permuteWithinGroups(group, condition, groups):\n",
      "        permuted = condition\n",
      "        for g in groups:\n",
      "            permuted[group == g] = np.random.permutation(condition[group == g])      \n",
      "        return permuted\n",
      "#\n",
      "#  (alternate approaches: assign randoms to all, then sort, or use pandas.groupby and transform(np.random.permutation))\n",
      "    tst = testStatistic(group, condition, response, groups, conditions)\n",
      "    dist = np.zeros(iterations)\n",
      "    for i in np.arange(iterations):\n",
      "         dist[i] = testStatistic( group, \n",
      "                                  permuteWithinGroups(group, condition, groups), \\\n",
      "                                  response, groups, conditions\\\n",
      "                                )\n",
      "    pLeft = float(np.sum(dist <= tst))/float(iterations)\n",
      "    pRight = float(np.sum(dist >= tst))/float(iterations)\n",
      "    pBoth = float(np.sum(abs(dist) >= abs(tst)))/float(iterations)\n",
      "    return pLeft, pRight, pBoth, tst, dist"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 24
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%%time\n",
      "group = np.array([0, 0, 0, 1, 1, 1])\n",
      "condition = np.array([0, 0, 1, 0, 1, 1])\n",
      "response = np.array([0, 0, 1, 0, 1, 1])\n",
      "# for these data, left-sided and two-sided p-values should be 1/9 = .111\n",
      "iterations = 10**4\n",
      "leftP, rightP, bothP, teststat, dist = stratifiedPermutationTest(group, condition, response, iterations)\n",
      "print \"left, right, both p:\", leftP, rightP, bothP, \"test statistic:\", teststat"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "left, right, both p: 0.1099 1.0 0.1099 test statistic: -2.0\n",
        "CPU times: user 1.4 s, sys: 2.1 ms, total: 1.41 s\n",
        "Wall time: 1.41 s\n"
       ]
      }
     ],
     "prompt_number": 25
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "# Updated code"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import numpy as np\n",
      "\n",
      "# Move all the nested functions out, this makes it easier to understand\n",
      "# what each does, and less likely to accidentally capture state\n",
      "\n",
      "def testStatistic(group, condition, response, groups, conditions):\n",
      "    tst = 0.0\n",
      "    # Simplified by creating a few auxiliary variables. The python 'compiler'\n",
      "    # is about as dumb as it gets, so we need to help it by lifting constants\n",
      "    # manually. In this case, in addition to being a performance optimization\n",
      "    # I also think it makes the code clearer, it's easier to see what's being\n",
      "    # computed only once and reused.\n",
      "    \n",
      "    c0 = condition == conditions[0]\n",
      "    c1 = condition == conditions[1]\n",
      "    for g in groups:\n",
      "        gg = group == g\n",
      "        x = gg & c0\n",
      "        y = gg & c1\n",
      "        if (any(x) & any(y)):\n",
      "            tst += response[x].mean() - response[y].mean()\n",
      "    return tst\n",
      "\n",
      "\n",
      "def permuteWithinGroups(group, condition, groups):\n",
      "    permuted = condition\n",
      "    for g in groups:\n",
      "        gg = group == g\n",
      "        permuted[gg] = np.random.permutation(condition[gg])      \n",
      "    return permuted\n",
      "\n",
      "\n",
      "def stratifiedPermutationTest(group, condition, response, iterations):\n",
      "    '''\n",
      "    Stratified permutation test using the sum of the differences in means between two conditions in\n",
      "    each group (stratum) as the test statistic.\n",
      "    The test statistic is\n",
      "        \\sum_{g in groups} [\n",
      "                            mean(response for cases in group g assigned to first condition) -\n",
      "                            mean(response for cases in group g assigned to second condition)\n",
      "                           ].\n",
      "    There should be at least one group and no more than two conditions.\n",
      "    Under the null hypothesis, all assignments to the two conditions that preserve the number of\n",
      "    cases assigned to the conditions are equally likely.\n",
      "    Groups in which all cases are assigned to the same condition are skipped; they do not contribute \n",
      "    to the p-value since all randomizations give the same contribution to the difference in means.\n",
      "    \n",
      "    Dependencies: numpy (as np)\n",
      "    '''   \n",
      "    groups = np.unique(group)\n",
      "    conditions = np.unique(condition) # to do: test that number of conditions <= 2\n",
      "\n",
      "    #  (alternate approaches: assign randoms to all, then sort, or use pandas.groupby and transform(np.random.permutation))\n",
      "    tst = testStatistic(group, condition, response, groups, conditions)\n",
      "    dist = np.zeros(iterations)\n",
      "    for i in range(iterations):\n",
      "         dist[i] = testStatistic( group, \n",
      "                                  permuteWithinGroups(group, condition, groups),\n",
      "                                  response, groups, conditions\n",
      "                                )\n",
      "            \n",
      "    # define the conditions in one place, and then simply map count_nonzero over the lot\n",
      "    conds = [dist <= tst, dist >= tst, abs(dist) >= abs(tst)]\n",
      "    pLeft, pRight, pBoth = np.array(map(np.count_nonzero, conds))/float(iterations)\n",
      "    return pLeft, pRight, pBoth, tst, dist"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 31
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%%time\n",
      "group = np.array([0, 0, 0, 1, 1, 1])\n",
      "condition = np.array([0, 0, 1, 0, 1, 1])\n",
      "response = np.array([0, 0, 1, 0, 1, 1])\n",
      "# for these data, left-sided and two-sided p-values should be 1/9 = .111\n",
      "iterations = 10**4\n",
      "leftP, rightP, bothP, teststat, dist = stratifiedPermutationTest(group, condition, response, iterations)\n",
      "print \"left, right, both p:\", leftP, rightP, bothP, \"test statistic:\", teststat"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "left, right, both p: 0.1133 1.0 0.1133 test statistic: -2.0\n",
        "CPU times: user 983 ms, sys: 4 ms, total: 987 ms\n",
        "Wall time: 987 ms\n"
       ]
      }
     ],
     "prompt_number": 32
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
 }
	{
	"metadata": {
	"name": "",
	"signature": "sha256:b72676dbc41821fbd81e425bb755bd535a4fe0523b66e89833e498b65051798d"
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Original code sent by Philip\n",
	"\n",
	"Left here for reference/comparison"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"import numpy as np\n",
	"def stratifiedPermutationTest(group, condition, response, iterations):\n",
	" '''\n",
	" Stratified permutation test using the sum of the differences in means between two conditions in\n",
	" each group (stratum) as the test statistic.\n",
	" The test statistic is\n",
	" \\sum_{g in groups} [\n",
	" mean(response for cases in group g assigned to first condition) -\n",
	" mean(response for cases in group g assigned to second condition)\n",
	" ].\n",
	" There should be at least one group and no more than two conditions.\n",
	" Under the null hypothesis, all assignments to the two conditions that preserve the number of\n",
	" cases assigned to the conditions are equally likely.\n",
	" Groups in which all cases are assigned to the same condition are skipped; they do not contribute \n",
	" to the p-value since all randomizations give the same contribution to the difference in means.\n",
	" \n",
	" Dependencies: numpy (as np)\n",
	" ''' \n",
	" groups = np.unique(group)\n",
	" conditions = np.unique(condition) # to do: test that number of conditions <= 2\n",
	" def testStatistic(group, condition, response, groups, conditions):\n",
	" tst = 0.0\n",
	" for g in groups:\n",
	" x = (group == g) & (condition == conditions[0])\n",
	" y = (group == g) & (condition == conditions[1])\n",
	" if (any(x) & any(y)):\n",
	" tst = tst + \\\n",
	" np.mean(response[(group == g) & (condition == conditions[0])]) - \\\n",
	" np.mean(response[(group == g) & (condition == conditions[1])])\n",
	" return tst\n",
	"# \n",
	" def permuteWithinGroups(group, condition, groups):\n",
	" permuted = condition\n",
	" for g in groups:\n",
	" permuted[group == g] = np.random.permutation(condition[group == g]) \n",
	" return permuted\n",
	"#\n",
	"# (alternate approaches: assign randoms to all, then sort, or use pandas.groupby and transform(np.random.permutation))\n",
	" tst = testStatistic(group, condition, response, groups, conditions)\n",
	" dist = np.zeros(iterations)\n",
	" for i in np.arange(iterations):\n",
	" dist[i] = testStatistic( group, \n",
	" permuteWithinGroups(group, condition, groups), \\\n",
	" response, groups, conditions\\\n",
	" )\n",
	" pLeft = float(np.sum(dist <= tst))/float(iterations)\n",
	" pRight = float(np.sum(dist >= tst))/float(iterations)\n",
	" pBoth = float(np.sum(abs(dist) >= abs(tst)))/float(iterations)\n",
	" return pLeft, pRight, pBoth, tst, dist"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 24
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"%%time\n",
	"group = np.array([0, 0, 0, 1, 1, 1])\n",
	"condition = np.array([0, 0, 1, 0, 1, 1])\n",
	"response = np.array([0, 0, 1, 0, 1, 1])\n",
	"# for these data, left-sided and two-sided p-values should be 1/9 = .111\n",
	"iterations = 10**4\n",
	"leftP, rightP, bothP, teststat, dist = stratifiedPermutationTest(group, condition, response, iterations)\n",
	"print \"left, right, both p:\", leftP, rightP, bothP, \"test statistic:\", teststat"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"left, right, both p: 0.1099 1.0 0.1099 test statistic: -2.0\n",
	"CPU times: user 1.4 s, sys: 2.1 ms, total: 1.41 s\n",
	"Wall time: 1.41 s\n"
	]
	}
	],
	"prompt_number": 25
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Updated code"
	]
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"import numpy as np\n",
	"\n",
	"# Move all the nested functions out, this makes it easier to understand\n",
	"# what each does, and less likely to accidentally capture state\n",
	"\n",
	"def testStatistic(group, condition, response, groups, conditions):\n",
	" tst = 0.0\n",
	" # Simplified by creating a few auxiliary variables. The python 'compiler'\n",
	" # is about as dumb as it gets, so we need to help it by lifting constants\n",
	" # manually. In this case, in addition to being a performance optimization\n",
	" # I also think it makes the code clearer, it's easier to see what's being\n",
	" # computed only once and reused.\n",
	" \n",
	" c0 = condition == conditions[0]\n",
	" c1 = condition == conditions[1]\n",
	" for g in groups:\n",
	" gg = group == g\n",
	" x = gg & c0\n",
	" y = gg & c1\n",
	" if (any(x) & any(y)):\n",
	" tst += response[x].mean() - response[y].mean()\n",
	" return tst\n",
	"\n",
	"\n",
	"def permuteWithinGroups(group, condition, groups):\n",
	" permuted = condition\n",
	" for g in groups:\n",
	" gg = group == g\n",
	" permuted[gg] = np.random.permutation(condition[gg]) \n",
	" return permuted\n",
	"\n",
	"\n",
	"def stratifiedPermutationTest(group, condition, response, iterations):\n",
	" '''\n",
	" Stratified permutation test using the sum of the differences in means between two conditions in\n",
	" each group (stratum) as the test statistic.\n",
	" The test statistic is\n",
	" \\sum_{g in groups} [\n",
	" mean(response for cases in group g assigned to first condition) -\n",
	" mean(response for cases in group g assigned to second condition)\n",
	" ].\n",
	" There should be at least one group and no more than two conditions.\n",
	" Under the null hypothesis, all assignments to the two conditions that preserve the number of\n",
	" cases assigned to the conditions are equally likely.\n",
	" Groups in which all cases are assigned to the same condition are skipped; they do not contribute \n",
	" to the p-value since all randomizations give the same contribution to the difference in means.\n",
	" \n",
	" Dependencies: numpy (as np)\n",
	" ''' \n",
	" groups = np.unique(group)\n",
	" conditions = np.unique(condition) # to do: test that number of conditions <= 2\n",
	"\n",
	" # (alternate approaches: assign randoms to all, then sort, or use pandas.groupby and transform(np.random.permutation))\n",
	" tst = testStatistic(group, condition, response, groups, conditions)\n",
	" dist = np.zeros(iterations)\n",
	" for i in range(iterations):\n",
	" dist[i] = testStatistic( group, \n",
	" permuteWithinGroups(group, condition, groups),\n",
	" response, groups, conditions\n",
	" )\n",
	" \n",
	" # define the conditions in one place, and then simply map count_nonzero over the lot\n",
	" conds = [dist <= tst, dist >= tst, abs(dist) >= abs(tst)]\n",
	" pLeft, pRight, pBoth = np.array(map(np.count_nonzero, conds))/float(iterations)\n",
	" return pLeft, pRight, pBoth, tst, dist"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 31
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"%%time\n",
	"group = np.array([0, 0, 0, 1, 1, 1])\n",
	"condition = np.array([0, 0, 1, 0, 1, 1])\n",
	"response = np.array([0, 0, 1, 0, 1, 1])\n",
	"# for these data, left-sided and two-sided p-values should be 1/9 = .111\n",
	"iterations = 10**4\n",
	"leftP, rightP, bothP, teststat, dist = stratifiedPermutationTest(group, condition, response, iterations)\n",
	"print \"left, right, both p:\", leftP, rightP, bothP, \"test statistic:\", teststat"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"output_type": "stream",
	"stream": "stdout",
	"text": [
	"left, right, both p: 0.1133 1.0 0.1133 test statistic: -2.0\n",
	"CPU times: user 983 ms, sys: 4 ms, total: 987 ms\n",
	"Wall time: 987 ms\n"
	]
	}
	],
	"prompt_number": 32
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [],
	"language": "python",
	"metadata": {},
	"outputs": []
	}
	],
	"metadata": {}
	}
	]
	}