Skip to content

Instantly share code, notes, and snippets.

@forstater
Last active August 9, 2016 23:30
Show Gist options
  • Save forstater/95424aef366afe9ce97e91ea0420ee91 to your computer and use it in GitHub Desktop.
Save forstater/95424aef366afe9ce97e91ea0420ee91 to your computer and use it in GitHub Desktop.
Useful Functions for Analyzing Nanopore Data with MOSAIC
'''
:Author: Jacob Forstater <[email protected]> or <[email protected]>
:Description: Various functions/workflow that help to process nanopore data
Log:
JF 20160809 initial public commit
'''
import sys
import os
import glob
import time
import signal
import resource
import csv
import numpy as np
import shutil
import matplotlib.pyplot as plt
import json
import mosaic.utilities.ionic_current_stats as micurr
import mosaic.qdfTrajIO as qdf
import mosaic.settings as sett
#Query Multistate events BD and Staterest time and filter by staterestime, filter StateResTime.
def multistateFilter(myDB, myQueryArgs="NStates>=2 and NStates <6 and ResTime> 0.02", minStateLength=0.02):
#Takes database, filters (default above) and returns 2d array of BD, StateResTime
tempData=pd.DataFrame(query(myDB,"select BlockDepth, StateResTime from metadata where ProcessingStatus='normal' and "+str(myQueryArgs)))
tempData=tempData.rename(columns={0:'BD',1:'StateResTime'})
minLenFilter=lambda x: np.min(x)>=minStateLength
bdReturn=np.hstack(tempData[tempData.StateResTime.map(minLenFilter)].BD)
stateResReturn=np.hstack(tempData[tempData.StateResTime.map(minLenFilter)].StateResTime)
return np.stack([bdReturn,stateResReturn],axis=-1)
def RTBDQuery(myDB,myQueryArgs,minStateLength=0.02):
'''
:Description: Query ResTime, BlockDepth
:Args:
myDB DB of interest
myQueryArgs Specifications for query (further restrictions)
minStateLength Minimum length (in ms) of States
xLims Range of x axis to plot
yLims Range of y axis to plot
LogYScale Log y scale? (Default False)
:Return:
a filtered BD array
b filtered staterestime array
'''
linkPlot=pd.DataFrame(query(myDB,"select BlockDepth, StateResTime from metadata where ProcessingStatus='normal' and "+str(myQueryArgs)))
linkPlot=linkPlot.rename(columns={0:'BD',1:'StateResTime'})
func2=lambda x: np.min(x)>minStateLength
a=np.hstack(linkPlot[linkPlot.StateResTime.map(func2)].BD)
b=np.hstack(linkPlot[linkPlot.StateResTime.map(func2)].StateResTime)
return a,b
def RTvsBDPlot(myDB,myQueryArgs,minStateLength=0.02,xLims=(0,1),yLims=(0,100),LogYScale=False):
'''
:Description: Plot ResTime vs. BlockDepth
:Args:
myDB DB of interest
myQueryArgs Specifications for query (further restrictions)
minStateLength Minimum length (in ms) of States
xLims Range of x axis to plot
yLims Range of y axis to plot
LogYScale Log y scale? (Default False)
'''
a,b=RTBDQuery(myDB,myQueryArgs,minStateLength=minStateLength)
if LogYScale:
sns.jointplot(a,np.log10(b),xlim=xLims,ylim=yLims).set_axis_labels("BD","log(RT)")
else:
sns.jointplot(a,b,xlim=xLims,ylim=yLims).set_axis_labels("BD","RT")
def dirListing(dir):
'''
:Description: List directories containing qdf files, ignoring .DS_STORE folders
'''
dirnames = [dir+name+'/' for name in os.listdir(dir)
if not name.endswith('.qdf') or not name.endswith('.DS_STORE/')
]
return dirnames
def calcCurrentStats(dat):
blockstart=micurr.OpenCurrentDist(dat,.5)
return [blockstart[0],blockstart[1]]
def dirList2(dir):
"""
Returns all folders containing qdf files
"""
l = []
for root, dirs, files in os.walk(dir):
if any(file.endswith('.qdf') for file in files):
l.append(os.path.abspath(root))
return l
def plotDirectory(fileDir,Rfb=9.068E9,Cfb=1.064e-12,decimationFactor=1,st=0,ymin=-150,ymax=150):
"""
Generate time-series plot of each data set in a folder.
:Args:
- `fileDir` : directory containing data files
- `chNum' : Channel Number
- `decimationFactor` : How many points to skip
- `st`:
"""
# ChRfb=eval('Ch'+str(chNum)+'Rfb')
# ChCfb=eval('Ch'+str(chNum)+'Cfb')
mydirList=dirList2(fileDir)
fig, ax = plt.subplots(nrows=len(mydirList), ncols=1, figsize=(12,1.25*len(mydirList)), sharey=True,sharex=True)
j=0
for d in mydirList:
q=qdf.qdfTrajIO(dirname=d+'', filter='*.qdf', Rfb=Rfb, Cfb=Cfb)
toplot=q.popdata(2*q.FsHz)[::decimationFactor]
ax[j].plot( np.arange(0+q.start, (decimationFactor*toplot.size)/q.FsHz+q.start, decimationFactor/float(q.FsHz)),
toplot)
ax[j].set_title(str(d))
j+=1
plt.ylim(ymin,ymax)
plt.show()
#*************************************** Functions related to orbit 16 *******************************
#Dictionary of Orbit Amplifier Rfb and Cfb values
orbitFBrc = {
'Ch1Rfb' : 10.88E9,
'Ch1Cfb' : 0.790E-12,
'Ch2Rfb' : 10.1E9,
'Ch2Cfb' : 0.799E-12,
'Ch3Rfb' : 10.1E9, #VerifyCh3#
'Ch3Cfb' : 0.799E-12,
'Ch4Rfb' : 9.19E9,
'Ch4Cfb' : 0.795E-12,
'Ch5Rfb' : 10.09E9,
'Ch5Cfb' : 0.813E-12,
'Ch6Rfb' : 10.55E9,
'Ch6Cfb' : 0.811E-12,
'Ch7Rfb' : 9.840E9,
'Ch7Cfb' : 0.788E-12,
'Ch8Rfb' : 10.84E9,
'Ch8Cfb' : 0.789E-12
}
def orbitTraverseDirs(dir):
# Function traverses directory dir and returns list of folder lists with qdf files in them for each Ch.
# I have shifted the folder list by 1 position so channel number correctly corresponds with index. (i.e. ch1 is returned in [1] position)
l = []
filtList=[]
for root, dirs, files in os.walk(dir):
if any(file.endswith('.qdf') for file in files):
l.append(os.path.abspath(root))
# print(l)
#now filter down to specifics channels and separate into list for user
for i in range(0,8):
filtList.append([s for s in l if "/Ch"+str(i) in s and "PSD" not in s])
return filtList
# Routine for Examining all the folders for a given chanel
def plotAllFoldersRestricRange(fileDir,chNum,decimationFactor=1,st=0,lastFile=None,ymin=-150,ymax=150):
"""
Generate time-series plot of each data set in a folder.
:Args:
- `fileDir` : directory containing data files
- `chNum' : Channel Number
- `decimationFactor` : How many points to skip
- `st`:%%!
"""
if lastFile is None:
lastFile=len(orbitTraverseDirs(fileDir)[chNum])
ChRfb=orbitFBrc['Ch'+str(chNum)+'Rfb']
ChCfb=orbitFBrc['Ch'+str(chNum)+'Cfb']
fig, ax = plt.subplots(nrows=lastFile, ncols=1, figsize=(12,1.25*lastFile), sharey=True,sharex=True)
for i in range(0,lastFile):
q=qdf.qdfTrajIO(dirname=orbitTraverseDirs(fileDir)[chNum][i]+'/', filter='*.qdf', Start=st,Rfb=ChRfb, Cfb=ChCfb)
toplot=q.popdata(2*q.FsHz)[::decimationFactor]
ax[i].plot( np.arange(0+q.start, (decimationFactor*toplot.size)/q.FsHz+q.start, decimationFactor/float(q.FsHz)),
toplot)
ax[i].set_title(str(orbitTraverseDirs(fileDir)[chNum][i]))
plt.ylim(ymin,ymax)
plt.show()
def plotAllFolders(fileDir,chNum,decimationFactor,st):
"""
Generate time-series plot of each data set in a folder.
:Args:
- `fileDir` : directory containing data files
- `chNum' : Channel Number
- `decimationFactor` : How many points to skip
- `st`:%%!
"""
ChRfb=orbitFBrc['Ch'+str(chNum)+'Rfb']
ChCfb=orbitFBrc['Ch'+str(chNum)+'Cfb']
fig, ax = plt.subplots(nrows=len(orbitTraverseDirs(fileDir)[chNum]), ncols=1, figsize=(12, 12), sharey=True,sharex=True)
for i in range(0,len(orbitTraverseDirs(fileDir)[chNum])):
q=qdf.qdfTrajIO(dirname=orbitTraverseDirs(fileDir)[chNum][i]+'/', filter='*.qdf', Start=st,Rfb=ChRfb, Cfb=ChCfb)
toplot=q.popdata(2*q.FsHz)[::decimationFactor]
ax[i].plot( np.arange(0+q.start, (decimationFactor*toplot.size)/q.FsHz+q.start, decimationFactor/float(q.FsHz)),
toplot)
ax[i].set_title(str(orbitTraverseDirs(fileDir)[chNum][i]))
plt.ylim(-150,150)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment