Created
June 18, 2018 17:43
-
-
Save nick-youngblut/2df89764aa3a9fb399fb30283784c78d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import print_function | |
import sys,os | |
import gzip | |
import argparse | |
from Bio import SeqIO | |
desc = 'Convert between quality fastq formats' | |
epi = """DESCRIPTION: | |
Convert between fastq quality formats by using biopython. | |
The input fastq can be gzip'ed. | |
The output is written to STDOUT | |
--Formats-- | |
* "fastq" means Sanger style FASTQ files using PHRED scores and an ASCII offset of 33 (e.g. from the NCBI Short Read Archive and Illumina 1.8+). These can potentially hold PHRED scores from 0 to 93. | |
* "fastq-sanger" is an alias for "fastq". | |
* "fastq-solexa" means old Solexa (and also very early Illumina) style FASTQ files, using Solexa scores with an ASCII offset 64. These can hold Solexa scores from -5 to 62. | |
* "fastq-illumina" means newer Illumina 1.3 to 1.7 style FASTQ files, using PHRED scores but with an ASCII offset 64, allowing PHRED scores from 0 to 62. | |
""" | |
parser = argparse.ArgumentParser(description=desc, epilog=epi, | |
formatter_class=argparse.RawTextHelpFormatter) | |
parser.add_argument('fastq_file', metavar='fastq_file', type=str, | |
help='Input fastq file') | |
parser.add_argument('-i', '--in-format', default='fastq', type=str, | |
help='Input fastq format (default: %(default)s)') | |
parser.add_argument('-o', '--out-format', default='fastq-sanger', type=str, | |
help='Output fastq format (default: %(default)s)') | |
parser.add_argument('--version', action='version', version='0.1.0') | |
def main(args): | |
if args.fastq_file.endswith('.gz'): | |
inF = gzip.open(args.fastq_file, 'rt') | |
else: | |
inF = open(args.fastq_file, 'r') | |
for record in SeqIO.parse(inF, args.in_format): | |
print(record.format(args.out_format)) | |
inF.close() | |
if __name__ == '__main__': | |
args = parser.parse_args() | |
main(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment