Created
April 24, 2016 20:03
-
-
Save parashardhapola/efcce728601f335de50ae91411e165dc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from warnings import warn | |
import os | |
__author__ = "Parashar Dhapola" | |
__email__ = "" | |
__desc__ = """ | |
This script converts whole gene coordinates into TSS coordinates. | |
Ideally this script should work on any table downloaded form UCSC | |
table browser but has been only testedin "SGD track". | |
""" | |
def optParse(): | |
"""Returns filehandles if input parameters are correct. Else return 'False'""" | |
try: | |
input_bed = sys.argv[1] | |
out_bed = sys.argv[2] | |
except IndexError: | |
print "USAGE:\npython %s <input_bed_file> <output_bed_file>" % __file__ | |
return False | |
try: | |
fhi = open(input_bed) | |
except IOError: | |
print "Input bed file doesn't exist" | |
return False | |
try: | |
fho = open(out_bed, 'w') | |
except: | |
print "Couldn't create output file. Check permission" | |
return False | |
return fhi, fho | |
if __name__ == "__main__": | |
opt_ret = optParse() | |
if opt_ret is False: | |
sys.exit() | |
fhi, fho = opt_ret | |
out_bed_data = {} | |
for n,l in enumerate(fhi): | |
c = l.rstrip('\n').split('\t') | |
if c[3] not in out_bed_data: | |
if c[5] == "+": | |
tss = int(c[1]) | |
elif c[5] == "-": | |
tss = int(c[2])-1 # Because end coordinate is excluded in BED format | |
else: | |
raise ValueError("Unrecognized symbol for strand found in line %d. Exiting!" % n) | |
out_bed_data[c[3]] = "\t".join(map(str, [c[0], tss, tss+1, c[3]])) | |
else: | |
warn("Ignoring duplicate gene name %s" % c[3]) | |
fhi.close() | |
fho.write("\n".join(out_bed_data.values())) | |
fho.close() | |
print "Output written to file %s" % sys.argv[2] | |
print "Please run following command on bash shell to sort or use BedTools (use Excel if on Windows :( ))" | |
print "sort -k1,1 -k2n,2 %s > %s" % (sys.argv[2], sys.argv[2].split('.')[0]+"_sorted.bed") | |
print "Job completed!!" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment