Skip to content

Instantly share code, notes, and snippets.

@forstater
Created May 25, 2016 19:35
Show Gist options
  • Save forstater/6632e12715dcc9c5fb475d4a49dbe937 to your computer and use it in GitHub Desktop.
Save forstater/6632e12715dcc9c5fb475d4a49dbe937 to your computer and use it in GitHub Desktop.
Script to calculate optimal bin sizing using minimization of integrated square area
"""
Script to calculate optimal bin size (bandwidth).
:Created: 5/25/2016
:Author: Jacob Forstater <[email protected]>
:Reference: Shimazaki and Shinomoto. Histogram Binwidth Optimization Method. Neural Comput. v19 pp1503-1527 (2007)
:Original Source: Modified from http://toyoizumilab.brain.riken.jp/hideaki/res/histogram.html
:ChangeLog:
.. line-block::
05/25/16 JF Original Version
:Parameters:
----------
dbA, dbB : lists
list containing path to individual databases
myQuery : string
String defining database query
minBD,maxBD: float
Minimum and Maximum BlockDepth to filter data.
Default should be 0 to 1.
nmin, nmax: integer
Minimimum and maximum number of bins to try (note all integers in range (nmin,nmax) will be tried)
"""
## ADDITIONAL IMPORTED MODULES GO HERE###
from numpy import array, arange, argmin, sum, mean, var, size, zeros,\
where, histogram
from numpy.random import normal
from matplotlib.pyplot import figure, plot, hist, bar, xlabel, ylabel,\
title, show, savefig
# Set Parameters for analysis here ###
nmin = 30 # min number of bins
nmax = 1000 #max number of bins
minBD = 0 #minimum blockdepth to allow
maxBD = 1 #max blockdepth to allow
#define query
myQuery="select BlockDepth from metadata where ProcessingStatus='normal' and ResTime> 0.02"
#Define lists of databases you want to query here. Path's are relative to your working directory:
dbA=[
"Jacob_Redone/800mV_adept.sqlite",
"Jacob_Redone/800mV_cusumplus.sqlite" ]
dbB=["Jacob_Redone/400mV_adept.sqlite",
"Jacob_Redone/400mV_cusumplus.sqlite"]
#concatenate all of our db's together (in case you have multiple lists of databases)
dbList=dbA+dbB
#zip dogether dB and run.
def binSizeCalc(dbList,myQuery="select BlockDepth from metadata where ProcessingStatus='normal' and ResTime> 0.02",nmin=30,nmax=1000,minBD=0,maxBD=1):
"""
Parameters
--------------------
dbList : list
List of databases
myQuery : string
String defining database query
nmin, nmax: integer
Minimimum and maximum number of bins to try (note all integers in range (nmin,nmax) will be tried)
Default 30,1000
minBD,maxBD: float
Minimum and Maximum BlockDepth to filter data.
Default 0,1
Returns:
--------------------
[cmin, N[idx], optD] : list
List containing: number of minimum cost found, optimal number of bins, and optimal bin spacing
"""
for myDB in dbList:
data=np.concatenate(np.hstack(query(myDB,myQuery)))
x =filter(lambda x: x>minBD and x<=maxBD,data) # range of BD to use
x_max = max(x)
x_min = min(x)
N_MIN = nmin #Minimum number of bins (integer)
N_MAX = nmax #Maximum number of bins (integer)
N = arange(N_MIN, N_MAX) # #of Bins
D = (x_max - x_min) / N #Bin size vector
C = zeros(shape=(size(D), 1))
C = zeros(size(D))
#Computation of the cost function
for i in xrange(size(N)):
ki = histogram(x, bins=N[i])
ki = ki[0]
k = mean(ki) #Mean of event count
v = var(ki) #Variance of event count
C[i] = (2 * k - v) / (D[i]**2) #The cost Function
#Optimal Bin Size Selection
idx = argmin(C)
cmin = C[idx]
optD = D[idx]
print '#', cmin, N[idx], optD
fig = figure()
if 0: # Two ways of plotting histogram.
# http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.hist
hist(x, bins=N[idx])
else:
# http://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html
counts, bins = histogram(x, bins=N[idx])
bar(bins[:-1], counts, width=optD)
title(str(myDB)+str(myQuery))
ylabel("Frequency")
xlabel("Value")
show()#savefig('Hist.png')
fig = figure()
plot(D, C, '.b', optD, cmin, '*r')
show()#savefig('Fobj.png')
return [cmin,N[idx],optD]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment