Created
January 11, 2018 13:09
-
-
Save Chronial/2692b29181528f01eb2269f6d8b49317 to your computer and use it in GitHub Desktop.
Letter Table Code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "indata = [(\"E\", 21912),\n", | |
| "(\"T\", 16587),\n", | |
| "(\"A\", 14810),\n", | |
| "(\"O\", 14003),\n", | |
| "(\"I\", 13318),\n", | |
| "(\"N\", 12666),\n", | |
| "(\"S\", 11450),\n", | |
| "(\"R\", 10977),\n", | |
| "(\"H\", 10795),\n", | |
| "(\"D\", 7874),\n", | |
| "(\"L\", 7253),\n", | |
| "(\"U\", 5246),\n", | |
| "(\"C\", 4943),\n", | |
| "(\"M\", 4761),\n", | |
| "(\"F\", 4200),\n", | |
| "(\"Y\", 3853),\n", | |
| "(\"W\", 3819),\n", | |
| "(\"G\", 3693),\n", | |
| "(\"P\", 3316),\n", | |
| "(\"B\", 2715),\n", | |
| "(\"V\", 2019),\n", | |
| "(\"K\", 1257),\n", | |
| "(\"X\", 315),\n", | |
| "(\"Q\", 205),\n", | |
| "(\"J\", 188),\n", | |
| "(\"Z\", 128)]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 55, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'ETAOINSRHDLUCMFYWGPBVKXQJZ'" | |
| ] | |
| }, | |
| "execution_count": 55, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "\"\".join(x[0] for x in indata)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": { | |
| "collapsed": false, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{'A': 0.08123837786542185,\n", | |
| " 'B': 0.014892788379785303,\n", | |
| " 'C': 0.027114199985738028,\n", | |
| " 'D': 0.043191828988003486,\n", | |
| " 'E': 0.12019549870270922,\n", | |
| " 'F': 0.02303856765933638,\n", | |
| " 'G': 0.020257483420459344,\n", | |
| " 'H': 0.05921460425774672,\n", | |
| " 'I': 0.07305420097310522,\n", | |
| " 'J': 0.0010312501714179142,\n", | |
| " 'K': 0.006895114178044245,\n", | |
| " 'L': 0.03978541219837304,\n", | |
| " 'M': 0.02611586205383345,\n", | |
| " 'N': 0.06947773761265585,\n", | |
| " 'O': 0.07681168165087793,\n", | |
| " 'P': 0.018189497704371293,\n", | |
| " 'Q': 0.0011245015167057042,\n", | |
| " 'R': 0.06021294218965129,\n", | |
| " 'S': 0.06280752373795274,\n", | |
| " 'T': 0.09098588613462202,\n", | |
| " 'U': 0.02877626808116158,\n", | |
| " 'V': 0.011074968596238131,\n", | |
| " 'W': 0.020948640450239437,\n", | |
| " 'X': 0.0017278925744502285,\n", | |
| " 'Y': 0.021135143140815018,\n", | |
| " 'Z': 0.0007021277762845373}" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "total = sum(x[1] for x in indata)\n", | |
| "freq = {k: v/total for k,v in indata}\n", | |
| "freq" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "sort = list(sorted(freq.keys(), key=lambda x: -freq[x]))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 63, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def get_expectation(buckets):\n", | |
| " expected = 0\n", | |
| " for bi, bucket in enumerate(buckets):\n", | |
| " for li, l in enumerate(bucket):\n", | |
| " expected += freq[l] * (bi + 1 + li + 1)\n", | |
| " return expected" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 83, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5.920983198301728" | |
| ] | |
| }, | |
| "execution_count": 83, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "buckets = [\n", | |
| " 'ABCDEFG',\n", | |
| " 'HIJKLMN',\n", | |
| " 'OPQRST',\n", | |
| " 'UVWXYZ'\n", | |
| "]\n", | |
| "get_expectation(buckets)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 84, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "4.844884615173639" | |
| ] | |
| }, | |
| "execution_count": 84, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "buckets = [\n", | |
| " 'EACDGBF',\n", | |
| " 'INLMHKJ',\n", | |
| " 'ROTSPQ',\n", | |
| " 'UYWVXZ',\n", | |
| "]\n", | |
| "get_expectation(buckets)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 85, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "4.788813129789416" | |
| ] | |
| }, | |
| "execution_count": 85, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "buckets = [\n", | |
| " 'EOLPBKJ',\n", | |
| " 'ATCMFVQ',\n", | |
| " 'RNUHYX',\n", | |
| " 'ISDGWZ'\n", | |
| "]\n", | |
| "get_expectation(buckets)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 86, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "4.467891367668112" | |
| ] | |
| }, | |
| "execution_count": 86, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "buckets = [\n", | |
| " 'ETOSLYK',\n", | |
| " 'AIRUWX',\n", | |
| " 'NHCGQ',\n", | |
| " 'DMPJ',\n", | |
| " 'FBZ',\n", | |
| " 'V',\n", | |
| "]\n", | |
| "'ETA OIN SRHD LUCMF YWGPBV KXQJZ'\n", | |
| "get_expectation(buckets)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 87, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "4.479668464040635" | |
| ] | |
| }, | |
| "execution_count": 87, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "buckets = [\n", | |
| " 'ETOSLYK',\n", | |
| " 'AIRUWX',\n", | |
| " 'NHCGQ',\n", | |
| " 'DMPJ',\n", | |
| " 'FBVZ',\n", | |
| "]\n", | |
| "get_expectation(buckets)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.2" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment