Created
September 16, 2022 20:30
-
-
Save terrycojones/75b8c403d6de0ebb32e68003dbb67eff to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""Given category frequencies on the command line, print the entropy (base 2). | |
E.g., if there are 12 things in the first category, 4 in the second, | |
and 5 in the third, run | |
$ entropy.py 12 4 5 | |
1.4099 | |
""" | |
import sys | |
from math import log | |
from sklearn.metrics.cluster import entropy | |
if len(sys.argv) < 2: | |
print(f'Usage: {sys.argv[0]} freq1 [freq2 ...]', file=sys.stderr) | |
sys.exit(1) | |
try: | |
counts = list(map(int, sys.argv[1:])) | |
except ValueError: | |
print(f'{sys.argv[0]}: counts must be integers.', file=sys.stderr) | |
sys.exit(2) | |
else: | |
categories = [] | |
for category, count in enumerate(counts): | |
categories.extend([category] * count) | |
print(f'{entropy(categories) / log(2.0):.4f}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment