Last active
November 18, 2024 16:15
-
-
Save jaradc/eeddf20932c0347928d0da5a09298147 to your computer and use it in GitHub Desktop.
Four different ways to calculate entropy in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy.stats import entropy | |
from math import log, e | |
import pandas as pd | |
import timeit | |
def entropy1(labels, base=None): | |
value,counts = np.unique(labels, return_counts=True) | |
return entropy(counts, base=base) | |
def entropy2(labels, base=None): | |
""" Computes entropy of label distribution. """ | |
n_labels = len(labels) | |
if n_labels <= 1: | |
return 0 | |
value,counts = np.unique(labels, return_counts=True) | |
probs = counts / n_labels | |
n_classes = np.count_nonzero(probs) | |
if n_classes <= 1: | |
return 0 | |
ent = 0. | |
# Compute entropy | |
base = e if base is None else base | |
for i in probs: | |
ent -= i * log(i, base) | |
return ent | |
def entropy3(labels, base=None): | |
vc = pd.Series(labels).value_counts(normalize=True, sort=False) | |
base = e if base is None else base | |
return -(vc * np.log(vc)/np.log(base)).sum() | |
def entropy4(labels, base=None): | |
value,counts = np.unique(labels, return_counts=True) | |
norm_counts = counts / counts.sum() | |
base = e if base is None else base | |
return -(norm_counts * np.log(norm_counts)/np.log(base)).sum() | |
labels = [1,3,5,2,3,5,3,2,1,3,4,5] | |
print(entropy1(labels)) | |
print(entropy2(labels)) | |
print(entropy3(labels)) | |
print(entropy4(labels)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment