Skip to content

Instantly share code, notes, and snippets.

@Chronial
Created January 11, 2018 13:09
Show Gist options
  • Select an option

  • Save Chronial/2692b29181528f01eb2269f6d8b49317 to your computer and use it in GitHub Desktop.

Select an option

Save Chronial/2692b29181528f01eb2269f6d8b49317 to your computer and use it in GitHub Desktop.
Letter Table Code
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"indata = [(\"E\", 21912),\n",
"(\"T\", 16587),\n",
"(\"A\", 14810),\n",
"(\"O\", 14003),\n",
"(\"I\", 13318),\n",
"(\"N\", 12666),\n",
"(\"S\", 11450),\n",
"(\"R\", 10977),\n",
"(\"H\", 10795),\n",
"(\"D\", 7874),\n",
"(\"L\", 7253),\n",
"(\"U\", 5246),\n",
"(\"C\", 4943),\n",
"(\"M\", 4761),\n",
"(\"F\", 4200),\n",
"(\"Y\", 3853),\n",
"(\"W\", 3819),\n",
"(\"G\", 3693),\n",
"(\"P\", 3316),\n",
"(\"B\", 2715),\n",
"(\"V\", 2019),\n",
"(\"K\", 1257),\n",
"(\"X\", 315),\n",
"(\"Q\", 205),\n",
"(\"J\", 188),\n",
"(\"Z\", 128)]"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'ETAOINSRHDLUCMFYWGPBVKXQJZ'"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\".join(x[0] for x in indata)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"{'A': 0.08123837786542185,\n",
" 'B': 0.014892788379785303,\n",
" 'C': 0.027114199985738028,\n",
" 'D': 0.043191828988003486,\n",
" 'E': 0.12019549870270922,\n",
" 'F': 0.02303856765933638,\n",
" 'G': 0.020257483420459344,\n",
" 'H': 0.05921460425774672,\n",
" 'I': 0.07305420097310522,\n",
" 'J': 0.0010312501714179142,\n",
" 'K': 0.006895114178044245,\n",
" 'L': 0.03978541219837304,\n",
" 'M': 0.02611586205383345,\n",
" 'N': 0.06947773761265585,\n",
" 'O': 0.07681168165087793,\n",
" 'P': 0.018189497704371293,\n",
" 'Q': 0.0011245015167057042,\n",
" 'R': 0.06021294218965129,\n",
" 'S': 0.06280752373795274,\n",
" 'T': 0.09098588613462202,\n",
" 'U': 0.02877626808116158,\n",
" 'V': 0.011074968596238131,\n",
" 'W': 0.020948640450239437,\n",
" 'X': 0.0017278925744502285,\n",
" 'Y': 0.021135143140815018,\n",
" 'Z': 0.0007021277762845373}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"total = sum(x[1] for x in indata)\n",
"freq = {k: v/total for k,v in indata}\n",
"freq"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sort = list(sorted(freq.keys(), key=lambda x: -freq[x]))"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_expectation(buckets):\n",
" expected = 0\n",
" for bi, bucket in enumerate(buckets):\n",
" for li, l in enumerate(bucket):\n",
" expected += freq[l] * (bi + 1 + li + 1)\n",
" return expected"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"5.920983198301728"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"buckets = [\n",
" 'ABCDEFG',\n",
" 'HIJKLMN',\n",
" 'OPQRST',\n",
" 'UVWXYZ'\n",
"]\n",
"get_expectation(buckets)"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"4.844884615173639"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"buckets = [\n",
" 'EACDGBF',\n",
" 'INLMHKJ',\n",
" 'ROTSPQ',\n",
" 'UYWVXZ',\n",
"]\n",
"get_expectation(buckets)"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"4.788813129789416"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"buckets = [\n",
" 'EOLPBKJ',\n",
" 'ATCMFVQ',\n",
" 'RNUHYX',\n",
" 'ISDGWZ'\n",
"]\n",
"get_expectation(buckets)"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"4.467891367668112"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"buckets = [\n",
" 'ETOSLYK',\n",
" 'AIRUWX',\n",
" 'NHCGQ',\n",
" 'DMPJ',\n",
" 'FBZ',\n",
" 'V',\n",
"]\n",
"'ETA OIN SRHD LUCMF YWGPBV KXQJZ'\n",
"get_expectation(buckets)"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"4.479668464040635"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"buckets = [\n",
" 'ETOSLYK',\n",
" 'AIRUWX',\n",
" 'NHCGQ',\n",
" 'DMPJ',\n",
" 'FBVZ',\n",
"]\n",
"get_expectation(buckets)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment