Created
March 30, 2022 01:17
-
-
Save thesephist/147ed8f22eef181b33333f6c4b742e75 to your computer and use it in GitHub Desktop.
Count and plot histograms of line length in source code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// codecols designed after Rasmus Andersson's linelen_hist.sh | |
// https://gist.github.com/rsms/36bda3b5c8ab83d951e45ed788a184f4 | |
{ | |
println: println | |
default: default | |
map: map | |
stdin: stdin | |
range: range | |
filter: filter | |
reduce: reduce | |
append: append | |
values: values | |
identity: identity | |
partition: partition | |
} := import('std') | |
{ | |
split: split | |
join: join | |
padEnd: padEnd | |
} := import('str') | |
sort := import('sort') | |
math := import('math') | |
cli := import('cli') | |
// adjust libcli for (1) oak pack and (2) no verb in this CLI | |
argv := ['_exe', '_main'] |> append(args()) | |
Cli := cli.parseArgv(argv) | |
if Cli.opts.h = true | Cli.opts.help = true -> { | |
println('codecols counts columns in source code given to its standard input. | |
Usage | |
codecols [options] < your/code/*.c | |
cat *.c | codecols [options] | |
Options | |
--max-cols, -c Maximum number of columsn of code to display in the | |
output table | |
--histo-width, -w If the column counts are high enough that the histogram | |
must be scaled down to fit on a terminal screen, the | |
bars will be scaled such that the longest one is this | |
long. 60 by default.') | |
exit(0) | |
} | |
// histo returns a histogram bar of a given length | |
fn histo(n) { | |
whole := int(n / 8) | |
rem := n % 8 | |
graph := range(whole) |> map(fn '█') |> join() + if int(rem) { | |
0 -> '' | |
1 -> '▏' | |
2 -> '▎' | |
3 -> '▍' | |
4 -> '▌' | |
5 -> '▋' | |
6 -> '▊' | |
7 -> '▉' | |
} | |
if graph = '' & n > 0 { | |
true -> '▏' | |
_ -> graph | |
} | |
} | |
// list of number of non-zero column counts | |
cols := stdin() |> | |
split('\n') |> | |
filter(fn(s) s != '') |> | |
// round up to the nearest even number | |
map(fn(line) len(line) + len(line) % 2) | |
// same data as above, but in frequency map | |
freqs := cols |> | |
sort.sort!() |> | |
partition(identity) |> | |
reduce({}, fn(freq, ns) freq.(ns.0) := len(ns)) | |
min := 0 | |
max := math.max(keys(freqs) |> map(int)...) | |
maxCount := math.max(values(freqs)...) | |
maxHisto := int(Cli.opts.'histo-width' |> default(Cli.opts.w)) |> | |
default(60) |> | |
math.min(maxCount) | |
maxListedCols := int(Cli.opts.'max-cols' |> default(Cli.opts.c)) |> | |
default(max) | |
colWidth := math.max( | |
len('cols') | |
len(string(max)) | |
) | |
countWidth := math.max( | |
len('count') | |
len(string(maxCount)) | |
) | |
println( | |
'cols' |> padEnd(colWidth, ' ') | |
'count' |> padEnd(countWidth, ' ') | |
) | |
range(2, maxListedCols + 1, 2) |> map( | |
fn(n) println( | |
string(n) |> padEnd(colWidth, ' ') | |
freqs.(n) |> default(0) |> string() |> padEnd(countWidth, ' ') | |
histo(freqs.(n) |> default(0) |> math.scale(0, maxCount, 0, maxHisto * 8)) | |
) | |
) | |
println('average columns per line:', math.mean(cols) |> math.round(2)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ cat ~/src/oak/**/*.oak | codecols --max-cols 100 | |
cols count | |
2 835 ███████████████████████████████████████▍ | |
4 1272 ████████████████████████████████████████████████████████████ | |
6 850 ████████████████████████████████████████ | |
8 432 ████████████████████▍ | |
10 389 ██████████████████▎ | |
12 642 ██████████████████████████████▎ | |
14 671 ███████████████████████████████▋ | |
16 660 ███████████████████████████████▏ | |
18 631 █████████████████████████████▊ | |
20 581 ███████████████████████████▍ | |
22 412 ███████████████████▍ | |
24 418 ███████████████████▋ | |
26 371 █████████████████▌ | |
28 358 ████████████████▉ | |
30 323 ███████████████▏ | |
32 250 ███████████▊ | |
34 286 █████████████▍ | |
36 227 ██████████▋ | |
38 230 ██████████▊ | |
40 175 ████████▎ | |
42 222 ██████████▍ | |
44 212 ██████████ | |
46 190 ████████▉ | |
48 133 ██████▎ | |
50 154 ███████▎ | |
52 152 ███████▏ | |
54 145 ██████▊ | |
56 126 █████▉ | |
58 100 ████▋ | |
60 113 █████▎ | |
62 96 ████▌ | |
64 98 ████▌ | |
66 95 ████▍ | |
68 107 █████ | |
70 101 ████▊ | |
72 95 ████▍ | |
74 94 ████▍ | |
76 124 █████▊ | |
78 126 █████▉ | |
80 71 ███▎ | |
82 18 ▊ | |
84 23 █ | |
86 15 ▋ | |
88 19 ▉ | |
90 16 ▊ | |
92 9 ▍ | |
94 7 ▎ | |
96 11 ▌ | |
98 9 ▍ | |
100 10 ▍ | |
average columns per line: 25.55 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment