Last active
May 13, 2016 21:14
-
-
Save ipurusho/35191ec6962b8fc9203042678a2b5561 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
import re | |
import glob | |
#sys.argv[1] = action | |
#sys.argv[2] = top level folder | |
#sys.argv[3] = read | |
def subdirectories(args): | |
return [name for name in os.listdir(args) | |
if os.path.isdir(os.path.join(args, name))] | |
def findKey(input_dict, value): #util for dict | |
return {k for k, v in input_dict.items() if v == value} | |
def rchop(thestring, ending): #util for regex | |
if thestring.endswith(ending): | |
return thestring[:-len(ending)] | |
return thestring | |
def newName(oldName): #util for regex | |
prefix = '[A-Z]_+' | |
suffix = '.+(_[bc])' | |
noPool = re.match(prefix,oldName).group(0) | |
sampleOnly = re.match(suffix,oldName[len(noPool):]) | |
return rchop(sampleOnly.group(0),sampleOnly.group(1)) | |
def sampleDict(path): #files in dir are made into dictionary, value is R1 or R2 | |
direc = path # Get current working directory | |
ext = '.gz' # Select your file delimiter | |
exp = "R[1|2]" | |
regex = re.compile(exp) | |
samples = {} # Create an empty dict | |
# Select only files with the ext extension | |
fastq = [i for i in os.listdir(direc) if os.path.splitext(i)[1] == ext] | |
for f in fastq: | |
if regex.search(f) is not None: | |
read = re.findall(exp, f) | |
samples.update({f:read[0]}) | |
return samples | |
def merge(samples): #merges files | |
oldName = samples.keys()[1] | |
return "cat " + " ".join(sorted([str(x) for x in findKey(samples,sys.argv[3])])) + " > " + newName(oldName) + "_" + sys.argv[3]+"_"+sys.argv[1]+".fastq.gz" | |
def rename(samples): | |
oldName = samples.keys()[1] | |
return "mv " + " ".join(sorted([str(x) for x in findKey(samples,sys.argv[3])])) + " " + newName(oldName) + "_" + sys.argv[3]+"_"+sys.argv[1]+".fastq.gz" | |
def main(): | |
if sys.argv[1] == "merge": | |
for subdir in subdirectories(sys.argv[2]): | |
os.system(merge(sampleDict(sys.argv[2]+subdir))) | |
if sys.argv[1] == "rename": | |
for subdir in subdirectories(sys.argv[2]): | |
os.system(rename(sampleDict(sys.argv[2]+subdir))) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment