forstater · May 25, 2016 19:35
diff --git a/calcBinSize.py b/calcBinSize.py
 """
    Script to calculate optimal bin size (bandwidth). 

    :Created:   5/25/2016
    :Author:    Jacob Forstater <[email protected]>
    :Reference:   Shimazaki and Shinomoto. Histogram Binwidth Optimization Method. Neural Comput. v19 pp1503-1527 (2007)
    :Original Source: Modified from http://toyoizumilab.brain.riken.jp/hideaki/res/histogram.html
 
    :ChangeLog:
    .. line-block::
        05/25/16    JF  Original Version



    :Parameters:
        ----------
        dbA, dbB : lists
            list containing path to individual databases

        myQuery : string
            String defining database query

        minBD,maxBD: float
            Minimum and Maximum BlockDepth to filter data. 
            Default should be 0 to 1.
      
        nmin, nmax: integer
            Minimimum and maximum number of bins to try (note all integers in range (nmin,nmax) will be tried)


 """


 ## ADDITIONAL IMPORTED MODULES GO HERE###
 from numpy import array, arange, argmin, sum, mean, var, size, zeros,\
 	where, histogram
 from numpy.random import normal
 from matplotlib.pyplot import figure, plot, hist, bar, xlabel, ylabel,\
    title, show, savefig

 # Set Parameters for analysis here ###

 nmin = 30   # min number of bins
 nmax = 1000 #max number of bins
 minBD = 0 #minimum blockdepth to allow
 maxBD = 1 #max blockdepth to allow

 #define query

 myQuery="select BlockDepth from metadata where ProcessingStatus='normal' and ResTime> 0.02"




 #Define lists of databases you want to query here. Path's are relative to your working directory:

 dbA=[
        "Jacob_Redone/800mV_adept.sqlite",
        "Jacob_Redone/800mV_cusumplus.sqlite" ]
        
 dbB=["Jacob_Redone/400mV_adept.sqlite",
        "Jacob_Redone/400mV_cusumplus.sqlite"]
        
      
 #concatenate all of our db's together (in case you have multiple lists of databases)
 dbList=dbA+dbB


 #zip dogether dB and run.

 def binSizeCalc(dbList,myQuery="select BlockDepth from metadata where ProcessingStatus='normal' and ResTime> 0.02",nmin=30,nmax=1000,minBD=0,maxBD=1):
 """

    Parameters
    --------------------
        dbList  : list
            List of databases

        myQuery : string
            String defining database query


        nmin, nmax: integer
            Minimimum and maximum number of bins to try (note all integers in range (nmin,nmax) will be tried)
            Default 30,1000

        minBD,maxBD: float
            Minimum and Maximum BlockDepth to filter data. 
            Default 0,1



    Returns:
    --------------------

        [cmin, N[idx], optD] : list
            List containing: number of minimum cost found, optimal number of bins, and optimal bin spacing

 """
    for myDB in dbList:
        data=np.concatenate(np.hstack(query(myDB,myQuery)))

        x =filter(lambda x: x>minBD and x<=maxBD,data)   # range of BD to use 
        x_max = max(x)
        x_min = min(x)
        N_MIN = nmin   #Minimum number of bins (integer)
        N_MAX = nmax  #Maximum number of bins (integer)
        N = arange(N_MIN, N_MAX) # #of Bins
        D = (x_max - x_min) / N    #Bin size vector
        C = zeros(shape=(size(D), 1))
        C = zeros(size(D))
        
        #Computation of the cost function
        for i in xrange(size(N)):
        	ki = histogram(x, bins=N[i])
        	ki = ki[0]
        	k = mean(ki) #Mean of event count
        	v = var(ki)  #Variance of event count
        	C[i] = (2 * k - v) / (D[i]**2) #The cost Function
        #Optimal Bin Size Selection
        
        idx = argmin(C)
        cmin = C[idx]
        optD = D[idx]
        
        print '#', cmin, N[idx], optD
        
        fig = figure()
        if 0: # Two ways of plotting histogram.
        	# http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.hist
        	hist(x, bins=N[idx])
        else:
        	# http://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html
        	counts, bins = histogram(x, bins=N[idx])
        	bar(bins[:-1], counts, width=optD)
        title(str(myDB)+str(myQuery))
        ylabel("Frequency")
        xlabel("Value")
        show()#savefig('Hist.png')         
        fig = figure()
        plot(D, C, '.b', optD, cmin, '*r')
        show()#savefig('Fobj.png')

        return [cmin,N[idx],optD]
	"""
	Script to calculate optimal bin size (bandwidth).

	:Created: 5/25/2016
	:Author: Jacob Forstater <[email protected]>
	:Reference: Shimazaki and Shinomoto. Histogram Binwidth Optimization Method. Neural Comput. v19 pp1503-1527 (2007)
	:Original Source: Modified from http://toyoizumilab.brain.riken.jp/hideaki/res/histogram.html

	:ChangeLog:
	.. line-block::
	05/25/16 JF Original Version



	:Parameters:
	----------
	dbA, dbB : lists
	list containing path to individual databases

	myQuery : string
	String defining database query

	minBD,maxBD: float
	Minimum and Maximum BlockDepth to filter data.
	Default should be 0 to 1.

	nmin, nmax: integer
	Minimimum and maximum number of bins to try (note all integers in range (nmin,nmax) will be tried)


	"""


	## ADDITIONAL IMPORTED MODULES GO HERE###
	from numpy import array, arange, argmin, sum, mean, var, size, zeros,\
	where, histogram
	from numpy.random import normal
	from matplotlib.pyplot import figure, plot, hist, bar, xlabel, ylabel,\
	title, show, savefig

	# Set Parameters for analysis here ###

	nmin = 30 # min number of bins
	nmax = 1000 #max number of bins
	minBD = 0 #minimum blockdepth to allow
	maxBD = 1 #max blockdepth to allow

	#define query

	myQuery="select BlockDepth from metadata where ProcessingStatus='normal' and ResTime> 0.02"




	#Define lists of databases you want to query here. Path's are relative to your working directory:

	dbA=[
	"Jacob_Redone/800mV_adept.sqlite",
	"Jacob_Redone/800mV_cusumplus.sqlite" ]

	dbB=["Jacob_Redone/400mV_adept.sqlite",
	"Jacob_Redone/400mV_cusumplus.sqlite"]


	#concatenate all of our db's together (in case you have multiple lists of databases)
	dbList=dbA+dbB


	#zip dogether dB and run.

	def binSizeCalc(dbList,myQuery="select BlockDepth from metadata where ProcessingStatus='normal' and ResTime> 0.02",nmin=30,nmax=1000,minBD=0,maxBD=1):
	"""

	Parameters
	--------------------
	dbList : list
	List of databases

	myQuery : string
	String defining database query


	nmin, nmax: integer
	Minimimum and maximum number of bins to try (note all integers in range (nmin,nmax) will be tried)
	Default 30,1000

	minBD,maxBD: float
	Minimum and Maximum BlockDepth to filter data.
	Default 0,1



	Returns:
	--------------------

	[cmin, N[idx], optD] : list
	List containing: number of minimum cost found, optimal number of bins, and optimal bin spacing

	"""
	for myDB in dbList:
	data=np.concatenate(np.hstack(query(myDB,myQuery)))

	x =filter(lambda x: x>minBD and x<=maxBD,data) # range of BD to use
	x_max = max(x)
	x_min = min(x)
	N_MIN = nmin #Minimum number of bins (integer)
	N_MAX = nmax #Maximum number of bins (integer)
	N = arange(N_MIN, N_MAX) # #of Bins
	D = (x_max - x_min) / N #Bin size vector
	C = zeros(shape=(size(D), 1))
	C = zeros(size(D))

	#Computation of the cost function
	for i in xrange(size(N)):
	ki = histogram(x, bins=N[i])
	ki = ki[0]
	k = mean(ki) #Mean of event count
	v = var(ki) #Variance of event count
	C[i] = (2 * k - v) / (D[i]**2) #The cost Function
	#Optimal Bin Size Selection

	idx = argmin(C)
	cmin = C[idx]
	optD = D[idx]

	print '#', cmin, N[idx], optD

	fig = figure()
	if 0: # Two ways of plotting histogram.
	# http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.hist
	hist(x, bins=N[idx])
	else:
	# http://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html
	counts, bins = histogram(x, bins=N[idx])
	bar(bins[:-1], counts, width=optD)
	title(str(myDB)+str(myQuery))
	ylabel("Frequency")
	xlabel("Value")
	show()#savefig('Hist.png')
	fig = figure()
	plot(D, C, '.b', optD, cmin, '*r')
	show()#savefig('Fobj.png')

	return [cmin,N[idx],optD]