Skip to content

Instantly share code, notes, and snippets.

Created October 12, 2015 01:41
Show Gist options
  • Save anonymous/d9c4583b0466344ddc38 to your computer and use it in GitHub Desktop.
Save anonymous/d9c4583b0466344ddc38 to your computer and use it in GitHub Desktop.
from math import log
def calcShannonEnt(dataSet):
numEntries = len(dataSet)
labelCounts = {}
for featVec in dataSet:
currentLabel = featVec[-1]
if currentLabel not in labelCounts.keys():
labelCounts[currentLabel] = 0
labelCounts[currentLabel] += 1
shannonEnt = 0.0
for key in labelCounts:
prob = float(labelCounts[key])/numEntries
shannonEnt -= prob * log(prob,2)
return shannonEnt
def createDataSet():
dataSet = [[1, 1, 'yes'],
[1, 1, 'yes'],
[1, 0, 'no'],
[0, 1, 'no'],
[0, 1, 'no']]
labels = ['no surfacing', 'flippers']
return dataSet, labels
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment