Last active
August 9, 2016 23:30
-
-
Save forstater/95424aef366afe9ce97e91ea0420ee91 to your computer and use it in GitHub Desktop.
Useful Functions for Analyzing Nanopore Data with MOSAIC
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
:Author: Jacob Forstater <[email protected]> or <[email protected]> | |
:Description: Various functions/workflow that help to process nanopore data | |
Log: | |
JF 20160809 initial public commit | |
''' | |
import sys | |
import os | |
import glob | |
import time | |
import signal | |
import resource | |
import csv | |
import numpy as np | |
import shutil | |
import matplotlib.pyplot as plt | |
import json | |
import mosaic.utilities.ionic_current_stats as micurr | |
import mosaic.qdfTrajIO as qdf | |
import mosaic.settings as sett | |
#Query Multistate events BD and Staterest time and filter by staterestime, filter StateResTime. | |
def multistateFilter(myDB, myQueryArgs="NStates>=2 and NStates <6 and ResTime> 0.02", minStateLength=0.02): | |
#Takes database, filters (default above) and returns 2d array of BD, StateResTime | |
tempData=pd.DataFrame(query(myDB,"select BlockDepth, StateResTime from metadata where ProcessingStatus='normal' and "+str(myQueryArgs))) | |
tempData=tempData.rename(columns={0:'BD',1:'StateResTime'}) | |
minLenFilter=lambda x: np.min(x)>=minStateLength | |
bdReturn=np.hstack(tempData[tempData.StateResTime.map(minLenFilter)].BD) | |
stateResReturn=np.hstack(tempData[tempData.StateResTime.map(minLenFilter)].StateResTime) | |
return np.stack([bdReturn,stateResReturn],axis=-1) | |
def RTBDQuery(myDB,myQueryArgs,minStateLength=0.02): | |
''' | |
:Description: Query ResTime, BlockDepth | |
:Args: | |
myDB DB of interest | |
myQueryArgs Specifications for query (further restrictions) | |
minStateLength Minimum length (in ms) of States | |
xLims Range of x axis to plot | |
yLims Range of y axis to plot | |
LogYScale Log y scale? (Default False) | |
:Return: | |
a filtered BD array | |
b filtered staterestime array | |
''' | |
linkPlot=pd.DataFrame(query(myDB,"select BlockDepth, StateResTime from metadata where ProcessingStatus='normal' and "+str(myQueryArgs))) | |
linkPlot=linkPlot.rename(columns={0:'BD',1:'StateResTime'}) | |
func2=lambda x: np.min(x)>minStateLength | |
a=np.hstack(linkPlot[linkPlot.StateResTime.map(func2)].BD) | |
b=np.hstack(linkPlot[linkPlot.StateResTime.map(func2)].StateResTime) | |
return a,b | |
def RTvsBDPlot(myDB,myQueryArgs,minStateLength=0.02,xLims=(0,1),yLims=(0,100),LogYScale=False): | |
''' | |
:Description: Plot ResTime vs. BlockDepth | |
:Args: | |
myDB DB of interest | |
myQueryArgs Specifications for query (further restrictions) | |
minStateLength Minimum length (in ms) of States | |
xLims Range of x axis to plot | |
yLims Range of y axis to plot | |
LogYScale Log y scale? (Default False) | |
''' | |
a,b=RTBDQuery(myDB,myQueryArgs,minStateLength=minStateLength) | |
if LogYScale: | |
sns.jointplot(a,np.log10(b),xlim=xLims,ylim=yLims).set_axis_labels("BD","log(RT)") | |
else: | |
sns.jointplot(a,b,xlim=xLims,ylim=yLims).set_axis_labels("BD","RT") | |
def dirListing(dir): | |
''' | |
:Description: List directories containing qdf files, ignoring .DS_STORE folders | |
''' | |
dirnames = [dir+name+'/' for name in os.listdir(dir) | |
if not name.endswith('.qdf') or not name.endswith('.DS_STORE/') | |
] | |
return dirnames | |
def calcCurrentStats(dat): | |
blockstart=micurr.OpenCurrentDist(dat,.5) | |
return [blockstart[0],blockstart[1]] | |
def dirList2(dir): | |
""" | |
Returns all folders containing qdf files | |
""" | |
l = [] | |
for root, dirs, files in os.walk(dir): | |
if any(file.endswith('.qdf') for file in files): | |
l.append(os.path.abspath(root)) | |
return l | |
def plotDirectory(fileDir,Rfb=9.068E9,Cfb=1.064e-12,decimationFactor=1,st=0,ymin=-150,ymax=150): | |
""" | |
Generate time-series plot of each data set in a folder. | |
:Args: | |
- `fileDir` : directory containing data files | |
- `chNum' : Channel Number | |
- `decimationFactor` : How many points to skip | |
- `st`: | |
""" | |
# ChRfb=eval('Ch'+str(chNum)+'Rfb') | |
# ChCfb=eval('Ch'+str(chNum)+'Cfb') | |
mydirList=dirList2(fileDir) | |
fig, ax = plt.subplots(nrows=len(mydirList), ncols=1, figsize=(12,1.25*len(mydirList)), sharey=True,sharex=True) | |
j=0 | |
for d in mydirList: | |
q=qdf.qdfTrajIO(dirname=d+'', filter='*.qdf', Rfb=Rfb, Cfb=Cfb) | |
toplot=q.popdata(2*q.FsHz)[::decimationFactor] | |
ax[j].plot( np.arange(0+q.start, (decimationFactor*toplot.size)/q.FsHz+q.start, decimationFactor/float(q.FsHz)), | |
toplot) | |
ax[j].set_title(str(d)) | |
j+=1 | |
plt.ylim(ymin,ymax) | |
plt.show() | |
#*************************************** Functions related to orbit 16 ******************************* | |
#Dictionary of Orbit Amplifier Rfb and Cfb values | |
orbitFBrc = { | |
'Ch1Rfb' : 10.88E9, | |
'Ch1Cfb' : 0.790E-12, | |
'Ch2Rfb' : 10.1E9, | |
'Ch2Cfb' : 0.799E-12, | |
'Ch3Rfb' : 10.1E9, #VerifyCh3# | |
'Ch3Cfb' : 0.799E-12, | |
'Ch4Rfb' : 9.19E9, | |
'Ch4Cfb' : 0.795E-12, | |
'Ch5Rfb' : 10.09E9, | |
'Ch5Cfb' : 0.813E-12, | |
'Ch6Rfb' : 10.55E9, | |
'Ch6Cfb' : 0.811E-12, | |
'Ch7Rfb' : 9.840E9, | |
'Ch7Cfb' : 0.788E-12, | |
'Ch8Rfb' : 10.84E9, | |
'Ch8Cfb' : 0.789E-12 | |
} | |
def orbitTraverseDirs(dir): | |
# Function traverses directory dir and returns list of folder lists with qdf files in them for each Ch. | |
# I have shifted the folder list by 1 position so channel number correctly corresponds with index. (i.e. ch1 is returned in [1] position) | |
l = [] | |
filtList=[] | |
for root, dirs, files in os.walk(dir): | |
if any(file.endswith('.qdf') for file in files): | |
l.append(os.path.abspath(root)) | |
# print(l) | |
#now filter down to specifics channels and separate into list for user | |
for i in range(0,8): | |
filtList.append([s for s in l if "/Ch"+str(i) in s and "PSD" not in s]) | |
return filtList | |
# Routine for Examining all the folders for a given chanel | |
def plotAllFoldersRestricRange(fileDir,chNum,decimationFactor=1,st=0,lastFile=None,ymin=-150,ymax=150): | |
""" | |
Generate time-series plot of each data set in a folder. | |
:Args: | |
- `fileDir` : directory containing data files | |
- `chNum' : Channel Number | |
- `decimationFactor` : How many points to skip | |
- `st`:%%! | |
""" | |
if lastFile is None: | |
lastFile=len(orbitTraverseDirs(fileDir)[chNum]) | |
ChRfb=orbitFBrc['Ch'+str(chNum)+'Rfb'] | |
ChCfb=orbitFBrc['Ch'+str(chNum)+'Cfb'] | |
fig, ax = plt.subplots(nrows=lastFile, ncols=1, figsize=(12,1.25*lastFile), sharey=True,sharex=True) | |
for i in range(0,lastFile): | |
q=qdf.qdfTrajIO(dirname=orbitTraverseDirs(fileDir)[chNum][i]+'/', filter='*.qdf', Start=st,Rfb=ChRfb, Cfb=ChCfb) | |
toplot=q.popdata(2*q.FsHz)[::decimationFactor] | |
ax[i].plot( np.arange(0+q.start, (decimationFactor*toplot.size)/q.FsHz+q.start, decimationFactor/float(q.FsHz)), | |
toplot) | |
ax[i].set_title(str(orbitTraverseDirs(fileDir)[chNum][i])) | |
plt.ylim(ymin,ymax) | |
plt.show() | |
def plotAllFolders(fileDir,chNum,decimationFactor,st): | |
""" | |
Generate time-series plot of each data set in a folder. | |
:Args: | |
- `fileDir` : directory containing data files | |
- `chNum' : Channel Number | |
- `decimationFactor` : How many points to skip | |
- `st`:%%! | |
""" | |
ChRfb=orbitFBrc['Ch'+str(chNum)+'Rfb'] | |
ChCfb=orbitFBrc['Ch'+str(chNum)+'Cfb'] | |
fig, ax = plt.subplots(nrows=len(orbitTraverseDirs(fileDir)[chNum]), ncols=1, figsize=(12, 12), sharey=True,sharex=True) | |
for i in range(0,len(orbitTraverseDirs(fileDir)[chNum])): | |
q=qdf.qdfTrajIO(dirname=orbitTraverseDirs(fileDir)[chNum][i]+'/', filter='*.qdf', Start=st,Rfb=ChRfb, Cfb=ChCfb) | |
toplot=q.popdata(2*q.FsHz)[::decimationFactor] | |
ax[i].plot( np.arange(0+q.start, (decimationFactor*toplot.size)/q.FsHz+q.start, decimationFactor/float(q.FsHz)), | |
toplot) | |
ax[i].set_title(str(orbitTraverseDirs(fileDir)[chNum][i])) | |
plt.ylim(-150,150) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment