Created
June 9, 2013 20:49
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
from ruffus import * | |
import subprocess | |
import sqlite3 | |
parser = cmdline.get_argparse(description='Midlands-1 analysis pipeline') | |
parser.add_argument("-s", "--sqlite_db", type=str, help="Name and path of SQLite3 database") | |
parser.add_argument("-c", "--sql_command", type=str, help="SQL command to return rows") | |
options = parser.parse_args() | |
# optional logger which can be passed to ruffus tasks | |
logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose) | |
#_____________________________________________________________________________________ | |
# pipelined functions go here | |
#_____________________________________________________________________________________ | |
def cmd_exists(cmd): | |
try: | |
ret = subprocess.call([cmd], | |
stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
return True | |
except OSError, e: | |
return 0 | |
binaries = dict((b, b) for b in ['bwa', 'samtools']) | |
missing_files = [binary_file for binary_file in binaries if not cmd_exists(binary_file)] | |
if missing_files: | |
raise SystemExit("%s not on path" % (", ".join(missing_files))) | |
def read_run_details_sqlite3(fn, cmd): | |
conn = sqlite3.connect(fn) | |
conn.row_factory=sqlite3.Row | |
c = conn.cursor() | |
c.execute(cmd) | |
rows = c.fetchall() | |
return rows | |
runs = read_run_details_sqlite3(options.sqlite_db, options.sql_command) | |
print "%d runs selected " % (len(runs),) | |
input_prefix = '/mnt/phatso/nick/alnQC' | |
usefulhash = {} | |
def make_input_files(outfile): | |
files = [] | |
for run in runs: | |
files.append(( | |
('%s/%s/%s_1_trimmed.fastq.gz' % (input_prefix, run['sequencing_plate'], run['label']), | |
'%s/%s/%s_2_trimmed.fastq.gz' % (input_prefix, run['sequencing_plate'], run['label'])), | |
outfile % run['label'], | |
run['label'] | |
)) | |
usefulhash[outfile % run['label']] = run['label'] | |
return files | |
initial_input_files = make_input_files('%s.sorted.bam') | |
denovo_assembly_files = make_input_files('%s.assembly') | |
@files(initial_input_files) | |
def align_to_les(infiles, outfile, label): | |
os.system("""%s mem -t16 refs/NC_011770.fna %s %s | | |
%s view -bS - | | |
%s sort -f -@ 8 - %s""" % (binaries['bwa'], | |
infiles[0], infiles[1], | |
binaries['samtools'], | |
binaries['samtools'], | |
outfile)) | |
@merge(align_to_les, "variants.vcf") | |
#@merge(['CF40-P31.sorted.bam', 'CF60-P44.sorted.bam'], "variants.vcf") | |
def generate_vcf(infiles, outfile): | |
fh = open("labels.txt", "w") | |
for fn in infiles: | |
print >>fh, usefulhash[fn] | |
fh.close() | |
cmd = "%s mpileup -q 50 -f refs/NC_011770.fna %s \ | |
| java -jar bin/VarScan.jar mpileup2snp \ | |
--min-coverage 2 --min-var-freq 0.9 --p-value 0.005 \ | |
--output-vcf --vcf-sample-list labels.txt > %s" % (binaries['samtools'], | |
" ".join(infiles), outfile) | |
os.system(cmd) | |
@transform(align_to_les, suffix(".sorted.bam"), r"\1_assembly/contigs.fa") | |
def velvet_assembly(infile, outfile): | |
label = usefulhash[infile] | |
os.system("bin/velvet_1.2.10/velveth %s_assembly 63 -bam %s" % (label, infile)) | |
os.system("bin/velvet_1.2.10/velvetg %s_assembly -exp_cov auto -cov_cutoff auto" % (label)) | |
@transform(velvet_assembly, suffix("contigs.fa"), "mlst.json") | |
def mlst_profile(infile, outfile): | |
os.system("curl -o %s -F file=@%s -F name=Pseudomonas_aeruginosa http://192.168.1.31:8001/mlst/?format=json" % (outfile, infile)) | |
cmdline.run(options) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment