Skip to content

Instantly share code, notes, and snippets.

@Wavesonics
Created November 16, 2024 23:32
Show Gist options
  • Save Wavesonics/76f8b99761f28ab393daa582d97ca7c1 to your computer and use it in GitHub Desktop.
Save Wavesonics/76f8b99761f28ab393daa582d97ca7c1 to your computer and use it in GitHub Desktop.
Normalize the frequency dictionary and show the top 100 values
Display the source blob
Display the rendered blob
Raw
{
"cells" : [ {
"metadata" : { },
"cell_type" : "markdown",
"source" : "# Normalize the Frequency dictionary and show some time entries",
"id" : "b5662893a4276c56"
}, {
"metadata" : { },
"cell_type" : "markdown",
"source" : "## Load the dictionary",
"id" : "8c56834998a91477"
}, {
"cell_type" : "code",
"metadata" : {
"collapsed" : true,
"ExecuteTime" : {
"end_time" : "2024-11-16T23:30:26.594864500Z",
"start_time" : "2024-11-16T23:30:26.520882400Z"
}
},
"source" : [ "import kotlin.math.*\n", "import java.io.File\n", "\n", "// Install Kotlin kernel in your Jupyter notebook to use this Kotlin code.\n", "\n", "// Load the frequency dictionary from the file \"en-80.txt\".\n", "val frequencyDict = File(\"en-80k.txt\").readLines()\n", " .map { it.split(\" \") }\n", " .associate { it[0] to it[1].toDouble() }" ],
"id" : "63a8a28bb28b5fe2",
"outputs" : [ ],
"execution_count" : 6
}, {
"metadata" : { },
"cell_type" : "markdown",
"source" : "## Normalize the Frequencies",
"id" : "e7372ec83db5a653"
}, {
"metadata" : {
"ExecuteTime" : {
"end_time" : "2024-11-16T23:30:26.662819800Z",
"start_time" : "2024-11-16T23:30:26.599012400Z"
}
},
"cell_type" : "code",
"source" : [ "// The transformation we want to perform could involve normalizing the frequencies, sorting, or any other meaningful operation.\n", "// For this example, let's create a transformation function that normalizes the values in our frequency dictionary between 0 and 1.\n", "fun normalizeFrequency(dictionary: Map<String, Double>): Map<String, Double> {\n", " val maxFrequency = dictionary.values.maxOrNull()?.toDouble() ?: 1.0 // Avoid division by zero.\n", " return dictionary.mapValues { (_, value) -> value / maxFrequency }\n", "}\n", "\n", "// Transform the frequency dictionary using the normalization function.\n", "val normalizedDict = normalizeFrequency(frequencyDict)" ],
"id" : "176a7504ce868a1c",
"outputs" : [ ],
"execution_count" : 7
}, {
"metadata" : { },
"cell_type" : "markdown",
"source" : "## Sort by Normalized Frequency",
"id" : "965f16b9bcdbd94"
}, {
"metadata" : {
"ExecuteTime" : {
"end_time" : "2024-11-16T23:30:26.710045900Z",
"start_time" : "2024-11-16T23:30:26.664831700Z"
}
},
"cell_type" : "code",
"source" : [ "// Let's also create a function to sort this dictionary by frequency, descending order.\n", "fun sortByFrequencyDesc(dictionary: Map<String, Double>): List<Pair<String, Double>> {\n", " return dictionary.toList().sortedByDescending { (_, value) -> value }\n", "}\n", "\n", "// Sort the normalized dictionary.\n", "val sortedNormalizedDict = sortByFrequencyDesc(normalizedDict)" ],
"id" : "f02a375bf778c7a0",
"outputs" : [ ],
"execution_count" : 8
}, {
"metadata" : { },
"cell_type" : "markdown",
"source" : "## Print the top 100 most comment entries\n",
"id" : "c606ce86c7d372a2"
}, {
"metadata" : {
"ExecuteTime" : {
"end_time" : "2024-11-16T23:30:26.764812900Z",
"start_time" : "2024-11-16T23:30:26.713586800Z"
}
},
"cell_type" : "code",
"source" : [ "println(\"\\nSorted Normalized Frequency Dictionary:\")\n", "sortedNormalizedDict.asSequence().take(100).forEach { (key, value) -> println(\"$key: $value\") }" ],
"id" : "a644bbc4fae3634e",
"outputs" : [ {
"name" : "stdout",
"output_type" : "stream",
"text" : [ "\n", "Sorted Normalized Frequency Dictionary:\r\n", "the: 1.0\r\n", "of: 0.5831938165627943\r\n", "and: 0.4262363002006846\r\n", "to: 0.36437510373748044\r\n", "in: 0.3181140082165972\r\n", "a: 0.288337179541287\r\n", "is: 0.15790227612044533\r\n", "that: 0.15068123344091458\r\n", "for: 0.12326941236874517\r\n", "it: 0.1081047891668036\r\n", "as: 0.107361759307572\r\n", "was: 0.1036344345970732\r\n", "with: 0.0976093748376779\r\n", "be: 0.0907551832230548\r\n", "by: 0.0885745789823279\r\n", "on: 0.0865298768339933\r\n", "not: 0.08517817878673417\r\n", "he: 0.07741348604049378\r\n", "i: 0.0731604677017636\r\n", "this: 0.07205748164651331\r\n", "are: 0.06969150566777767\r\n", "or: 0.06907517756061778\r\n", "his: 0.06801431842391989\r\n", "from: 0.06533675542174222\r\n", "at: 0.06428659670541728\r\n", "which: 0.05914098440538214\r\n", "but: 0.05258930132595755\r\n", "have: 0.05230849466451879\r\n", "an: 0.05134422855448481\r\n", "had: 0.0492684488154792\r\n", "they: 0.04637008235395681\r\n", "you: 0.04402749402632537\r\n", "were: 0.04276089871269687\r\n", "their: 0.04054786686575204\r\n", "one: 0.04047249802257234\r\n", "all: 0.03884877562812043\r\n", "we: 0.038745717586015346\r\n", "can: 0.03136533521681986\r\n", "her: 0.030760158175550705\r\n", "has: 0.030738741778419114\r\n", "there: 0.030579718640487214\r\n", "been: 0.030492046052201172\r\n", "if: 0.02947383363580643\r\n", "more: 0.02913993510155136\r\n", "when: 0.028621786508732077\r\n", "will: 0.027970210343521484\r\n", "would: 0.027737995352408777\r\n", "who: 0.027583364124935734\r\n", "so: 0.027292271716854024\r\n", "no: 0.026378339592347637\r\n", "she: 0.026229155962555734\r\n", "other: 0.02605004214795734\r\n", "its: 0.025791098310487093\r\n", "may: 0.024823247338712433\r\n", "these: 0.024592376524793656\r\n", "what: 0.023024438161892057\r\n", "them: 0.022593183584736543\r\n", "than: 0.022317913489945242\r\n", "some: 0.0222670178925261\r\n", "him: 0.022237868126014772\r\n", "time: 0.021993444988117696\r\n", "into: 0.021549624730973625\r\n", "only: 0.02138024141681475\r\n", "do: 0.02102932980892539\r\n", "such: 0.020718046342172426\r\n", "my: 0.020382891620353113\r\n", "new: 0.020213098529147426\r\n", "about: 0.020168238056054912\r\n", "out: 0.019741221482839893\r\n", "also: 0.019560663184376402\r\n", "two: 0.01947524028300076\r\n", "any: 0.018525354639067636\r\n", "up: 0.018370148352657763\r\n", "first: 0.017386087400953678\r\n", "could: 0.01670314922311412\r\n", "our: 0.0164785217932234\r\n", "then: 0.016437627821823616\r\n", "most: 0.016214870058563365\r\n", "see: 0.01594232884762976\r\n", "me: 0.015435231089363623\r\n", "should: 0.01533498916741005\r\n", "after: 0.01510853565136897\r\n", "said: 0.01427563041209887\r\n", "your: 0.014139523826684496\r\n", "very: 0.0140661834533368\r\n", "between: 0.014013253547732668\r\n", "made: 0.013892581759636863\r\n", "many: 0.013798735508557591\r\n", "over: 0.013475618302947953\r\n", "like: 0.013368866956403618\r\n", "those: 0.01302645425780571\r\n", "did: 0.012991518269139402\r\n", "now: 0.012794211882934108\r\n", "even: 0.012759750495866282\r\n", "well: 0.012720347150153674\r\n", "where: 0.012649326373285173\r\n", "must: 0.01248641745359908\r\n", "people: 0.01240586091361361\r\n", "through: 0.0121869298329085\r\n", "how: 0.011866343233155413\r\n" ]
} ],
"execution_count" : 9
} ],
"metadata" : {
"kernelspec" : {
"display_name" : "Kotlin",
"language" : "kotlin",
"name" : "kotlin"
},
"language_info" : {
"name" : "kotlin",
"version" : "1.9.23",
"mimetype" : "text/x-kotlin",
"file_extension" : ".kt",
"pygments_lexer" : "kotlin",
"codemirror_mode" : "text/x-kotlin",
"nbconvert_exporter" : ""
}
},
"nbformat" : 4,
"nbformat_minor" : 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment