Created
November 16, 2024 23:32
-
-
Save Wavesonics/76f8b99761f28ab393daa582d97ca7c1 to your computer and use it in GitHub Desktop.
Normalize the frequency dictionary and show the top 100 values
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells" : [ { | |
"metadata" : { }, | |
"cell_type" : "markdown", | |
"source" : "# Normalize the Frequency dictionary and show some time entries", | |
"id" : "b5662893a4276c56" | |
}, { | |
"metadata" : { }, | |
"cell_type" : "markdown", | |
"source" : "## Load the dictionary", | |
"id" : "8c56834998a91477" | |
}, { | |
"cell_type" : "code", | |
"metadata" : { | |
"collapsed" : true, | |
"ExecuteTime" : { | |
"end_time" : "2024-11-16T23:30:26.594864500Z", | |
"start_time" : "2024-11-16T23:30:26.520882400Z" | |
} | |
}, | |
"source" : [ "import kotlin.math.*\n", "import java.io.File\n", "\n", "// Install Kotlin kernel in your Jupyter notebook to use this Kotlin code.\n", "\n", "// Load the frequency dictionary from the file \"en-80.txt\".\n", "val frequencyDict = File(\"en-80k.txt\").readLines()\n", " .map { it.split(\" \") }\n", " .associate { it[0] to it[1].toDouble() }" ], | |
"id" : "63a8a28bb28b5fe2", | |
"outputs" : [ ], | |
"execution_count" : 6 | |
}, { | |
"metadata" : { }, | |
"cell_type" : "markdown", | |
"source" : "## Normalize the Frequencies", | |
"id" : "e7372ec83db5a653" | |
}, { | |
"metadata" : { | |
"ExecuteTime" : { | |
"end_time" : "2024-11-16T23:30:26.662819800Z", | |
"start_time" : "2024-11-16T23:30:26.599012400Z" | |
} | |
}, | |
"cell_type" : "code", | |
"source" : [ "// The transformation we want to perform could involve normalizing the frequencies, sorting, or any other meaningful operation.\n", "// For this example, let's create a transformation function that normalizes the values in our frequency dictionary between 0 and 1.\n", "fun normalizeFrequency(dictionary: Map<String, Double>): Map<String, Double> {\n", " val maxFrequency = dictionary.values.maxOrNull()?.toDouble() ?: 1.0 // Avoid division by zero.\n", " return dictionary.mapValues { (_, value) -> value / maxFrequency }\n", "}\n", "\n", "// Transform the frequency dictionary using the normalization function.\n", "val normalizedDict = normalizeFrequency(frequencyDict)" ], | |
"id" : "176a7504ce868a1c", | |
"outputs" : [ ], | |
"execution_count" : 7 | |
}, { | |
"metadata" : { }, | |
"cell_type" : "markdown", | |
"source" : "## Sort by Normalized Frequency", | |
"id" : "965f16b9bcdbd94" | |
}, { | |
"metadata" : { | |
"ExecuteTime" : { | |
"end_time" : "2024-11-16T23:30:26.710045900Z", | |
"start_time" : "2024-11-16T23:30:26.664831700Z" | |
} | |
}, | |
"cell_type" : "code", | |
"source" : [ "// Let's also create a function to sort this dictionary by frequency, descending order.\n", "fun sortByFrequencyDesc(dictionary: Map<String, Double>): List<Pair<String, Double>> {\n", " return dictionary.toList().sortedByDescending { (_, value) -> value }\n", "}\n", "\n", "// Sort the normalized dictionary.\n", "val sortedNormalizedDict = sortByFrequencyDesc(normalizedDict)" ], | |
"id" : "f02a375bf778c7a0", | |
"outputs" : [ ], | |
"execution_count" : 8 | |
}, { | |
"metadata" : { }, | |
"cell_type" : "markdown", | |
"source" : "## Print the top 100 most comment entries\n", | |
"id" : "c606ce86c7d372a2" | |
}, { | |
"metadata" : { | |
"ExecuteTime" : { | |
"end_time" : "2024-11-16T23:30:26.764812900Z", | |
"start_time" : "2024-11-16T23:30:26.713586800Z" | |
} | |
}, | |
"cell_type" : "code", | |
"source" : [ "println(\"\\nSorted Normalized Frequency Dictionary:\")\n", "sortedNormalizedDict.asSequence().take(100).forEach { (key, value) -> println(\"$key: $value\") }" ], | |
"id" : "a644bbc4fae3634e", | |
"outputs" : [ { | |
"name" : "stdout", | |
"output_type" : "stream", | |
"text" : [ "\n", "Sorted Normalized Frequency Dictionary:\r\n", "the: 1.0\r\n", "of: 0.5831938165627943\r\n", "and: 0.4262363002006846\r\n", "to: 0.36437510373748044\r\n", "in: 0.3181140082165972\r\n", "a: 0.288337179541287\r\n", "is: 0.15790227612044533\r\n", "that: 0.15068123344091458\r\n", "for: 0.12326941236874517\r\n", "it: 0.1081047891668036\r\n", "as: 0.107361759307572\r\n", "was: 0.1036344345970732\r\n", "with: 0.0976093748376779\r\n", "be: 0.0907551832230548\r\n", "by: 0.0885745789823279\r\n", "on: 0.0865298768339933\r\n", "not: 0.08517817878673417\r\n", "he: 0.07741348604049378\r\n", "i: 0.0731604677017636\r\n", "this: 0.07205748164651331\r\n", "are: 0.06969150566777767\r\n", "or: 0.06907517756061778\r\n", "his: 0.06801431842391989\r\n", "from: 0.06533675542174222\r\n", "at: 0.06428659670541728\r\n", "which: 0.05914098440538214\r\n", "but: 0.05258930132595755\r\n", "have: 0.05230849466451879\r\n", "an: 0.05134422855448481\r\n", "had: 0.0492684488154792\r\n", "they: 0.04637008235395681\r\n", "you: 0.04402749402632537\r\n", "were: 0.04276089871269687\r\n", "their: 0.04054786686575204\r\n", "one: 0.04047249802257234\r\n", "all: 0.03884877562812043\r\n", "we: 0.038745717586015346\r\n", "can: 0.03136533521681986\r\n", "her: 0.030760158175550705\r\n", "has: 0.030738741778419114\r\n", "there: 0.030579718640487214\r\n", "been: 0.030492046052201172\r\n", "if: 0.02947383363580643\r\n", "more: 0.02913993510155136\r\n", "when: 0.028621786508732077\r\n", "will: 0.027970210343521484\r\n", "would: 0.027737995352408777\r\n", "who: 0.027583364124935734\r\n", "so: 0.027292271716854024\r\n", "no: 0.026378339592347637\r\n", "she: 0.026229155962555734\r\n", "other: 0.02605004214795734\r\n", "its: 0.025791098310487093\r\n", "may: 0.024823247338712433\r\n", "these: 0.024592376524793656\r\n", "what: 0.023024438161892057\r\n", "them: 0.022593183584736543\r\n", "than: 0.022317913489945242\r\n", "some: 0.0222670178925261\r\n", "him: 0.022237868126014772\r\n", "time: 0.021993444988117696\r\n", "into: 0.021549624730973625\r\n", "only: 0.02138024141681475\r\n", "do: 0.02102932980892539\r\n", "such: 0.020718046342172426\r\n", "my: 0.020382891620353113\r\n", "new: 0.020213098529147426\r\n", "about: 0.020168238056054912\r\n", "out: 0.019741221482839893\r\n", "also: 0.019560663184376402\r\n", "two: 0.01947524028300076\r\n", "any: 0.018525354639067636\r\n", "up: 0.018370148352657763\r\n", "first: 0.017386087400953678\r\n", "could: 0.01670314922311412\r\n", "our: 0.0164785217932234\r\n", "then: 0.016437627821823616\r\n", "most: 0.016214870058563365\r\n", "see: 0.01594232884762976\r\n", "me: 0.015435231089363623\r\n", "should: 0.01533498916741005\r\n", "after: 0.01510853565136897\r\n", "said: 0.01427563041209887\r\n", "your: 0.014139523826684496\r\n", "very: 0.0140661834533368\r\n", "between: 0.014013253547732668\r\n", "made: 0.013892581759636863\r\n", "many: 0.013798735508557591\r\n", "over: 0.013475618302947953\r\n", "like: 0.013368866956403618\r\n", "those: 0.01302645425780571\r\n", "did: 0.012991518269139402\r\n", "now: 0.012794211882934108\r\n", "even: 0.012759750495866282\r\n", "well: 0.012720347150153674\r\n", "where: 0.012649326373285173\r\n", "must: 0.01248641745359908\r\n", "people: 0.01240586091361361\r\n", "through: 0.0121869298329085\r\n", "how: 0.011866343233155413\r\n" ] | |
} ], | |
"execution_count" : 9 | |
} ], | |
"metadata" : { | |
"kernelspec" : { | |
"display_name" : "Kotlin", | |
"language" : "kotlin", | |
"name" : "kotlin" | |
}, | |
"language_info" : { | |
"name" : "kotlin", | |
"version" : "1.9.23", | |
"mimetype" : "text/x-kotlin", | |
"file_extension" : ".kt", | |
"pygments_lexer" : "kotlin", | |
"codemirror_mode" : "text/x-kotlin", | |
"nbconvert_exporter" : "" | |
} | |
}, | |
"nbformat" : 4, | |
"nbformat_minor" : 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment