Created
May 25, 2016 19:35
-
-
Save forstater/6632e12715dcc9c5fb475d4a49dbe937 to your computer and use it in GitHub Desktop.
Script to calculate optimal bin sizing using minimization of integrated square area
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Script to calculate optimal bin size (bandwidth). | |
:Created: 5/25/2016 | |
:Author: Jacob Forstater <[email protected]> | |
:Reference: Shimazaki and Shinomoto. Histogram Binwidth Optimization Method. Neural Comput. v19 pp1503-1527 (2007) | |
:Original Source: Modified from http://toyoizumilab.brain.riken.jp/hideaki/res/histogram.html | |
:ChangeLog: | |
.. line-block:: | |
05/25/16 JF Original Version | |
:Parameters: | |
---------- | |
dbA, dbB : lists | |
list containing path to individual databases | |
myQuery : string | |
String defining database query | |
minBD,maxBD: float | |
Minimum and Maximum BlockDepth to filter data. | |
Default should be 0 to 1. | |
nmin, nmax: integer | |
Minimimum and maximum number of bins to try (note all integers in range (nmin,nmax) will be tried) | |
""" | |
## ADDITIONAL IMPORTED MODULES GO HERE### | |
from numpy import array, arange, argmin, sum, mean, var, size, zeros,\ | |
where, histogram | |
from numpy.random import normal | |
from matplotlib.pyplot import figure, plot, hist, bar, xlabel, ylabel,\ | |
title, show, savefig | |
# Set Parameters for analysis here ### | |
nmin = 30 # min number of bins | |
nmax = 1000 #max number of bins | |
minBD = 0 #minimum blockdepth to allow | |
maxBD = 1 #max blockdepth to allow | |
#define query | |
myQuery="select BlockDepth from metadata where ProcessingStatus='normal' and ResTime> 0.02" | |
#Define lists of databases you want to query here. Path's are relative to your working directory: | |
dbA=[ | |
"Jacob_Redone/800mV_adept.sqlite", | |
"Jacob_Redone/800mV_cusumplus.sqlite" ] | |
dbB=["Jacob_Redone/400mV_adept.sqlite", | |
"Jacob_Redone/400mV_cusumplus.sqlite"] | |
#concatenate all of our db's together (in case you have multiple lists of databases) | |
dbList=dbA+dbB | |
#zip dogether dB and run. | |
def binSizeCalc(dbList,myQuery="select BlockDepth from metadata where ProcessingStatus='normal' and ResTime> 0.02",nmin=30,nmax=1000,minBD=0,maxBD=1): | |
""" | |
Parameters | |
-------------------- | |
dbList : list | |
List of databases | |
myQuery : string | |
String defining database query | |
nmin, nmax: integer | |
Minimimum and maximum number of bins to try (note all integers in range (nmin,nmax) will be tried) | |
Default 30,1000 | |
minBD,maxBD: float | |
Minimum and Maximum BlockDepth to filter data. | |
Default 0,1 | |
Returns: | |
-------------------- | |
[cmin, N[idx], optD] : list | |
List containing: number of minimum cost found, optimal number of bins, and optimal bin spacing | |
""" | |
for myDB in dbList: | |
data=np.concatenate(np.hstack(query(myDB,myQuery))) | |
x =filter(lambda x: x>minBD and x<=maxBD,data) # range of BD to use | |
x_max = max(x) | |
x_min = min(x) | |
N_MIN = nmin #Minimum number of bins (integer) | |
N_MAX = nmax #Maximum number of bins (integer) | |
N = arange(N_MIN, N_MAX) # #of Bins | |
D = (x_max - x_min) / N #Bin size vector | |
C = zeros(shape=(size(D), 1)) | |
C = zeros(size(D)) | |
#Computation of the cost function | |
for i in xrange(size(N)): | |
ki = histogram(x, bins=N[i]) | |
ki = ki[0] | |
k = mean(ki) #Mean of event count | |
v = var(ki) #Variance of event count | |
C[i] = (2 * k - v) / (D[i]**2) #The cost Function | |
#Optimal Bin Size Selection | |
idx = argmin(C) | |
cmin = C[idx] | |
optD = D[idx] | |
print '#', cmin, N[idx], optD | |
fig = figure() | |
if 0: # Two ways of plotting histogram. | |
# http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.hist | |
hist(x, bins=N[idx]) | |
else: | |
# http://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html | |
counts, bins = histogram(x, bins=N[idx]) | |
bar(bins[:-1], counts, width=optD) | |
title(str(myDB)+str(myQuery)) | |
ylabel("Frequency") | |
xlabel("Value") | |
show()#savefig('Hist.png') | |
fig = figure() | |
plot(D, C, '.b', optD, cmin, '*r') | |
show()#savefig('Fobj.png') | |
return [cmin,N[idx],optD] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment