Created
September 24, 2015 04:30
-
-
Save ShujiaHuang/791599a443793c991d1b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# If you adapt this script for your own use, you will need to set these two variables based on your environment. | |
# SV_DIR is the installation directory for SVToolkit - it must be an exported environment variable. | |
# SV_TMPDIR is a directory for writing temp files, which may be large if you have a large data set. | |
#export SV_DIR=`cd .. && pwd` | |
SV_DIR=/home/siyang/bin/software_pip/svtoolkit | |
SV_TMPDIR= | |
runDir= | |
bam= | |
#sy: bam without s signal in flag | |
sites= | |
genotypes= | |
#conf=/home/siyang/USER/yeweijian/project/AsmVar/Genome_STRip/test/test_genotyping/genstrip_Common_parameters.txt | |
conf=/home/siyang/USER/yeweijian/project/AsmVar/Genome_STRip/20140330Test/genstrip_OnlySplit_parameters.txt | |
REF=/project/DanishPanGenome/Public_Data/reference/human_g1k_v37_decoy.fasta | |
#REF=/home/siyang/AsmVar/GenomeDK/20130928_assembly_alignment/lastdb/human_g1k_v37_decoy.fasta | |
mask=/home/siyang/Database/Genome_STRip/ftp.broadinstitute.org/pub/svtoolkit/svmasks/human_g1k_v37.mask.100.fasta | |
ploidymaps=/home/siyang/Database/Genome_STRip/ftp.broadinstitute.org/pub/svtoolkit/ploidymaps/humgen_g1k_v37_ploidy.map | |
cnfa=/home/siyang/Database/Genome_STRip/ftp.broadinstitute.org/pub/svtoolkit/cn2masks/cn2_mask_g1k_v37.fasta | |
gender=/home/siyang/USER/yeweijian/project/AsmVar/Genome_STRip/sh/realgender.map | |
# These executables must be on your path. | |
which java > /dev/null || exit 1 | |
which Rscript > /dev/null || exit 1 | |
which samtools > /dev/null || exit 1 | |
# For SVAltAlign, you must use the version of bwa compatible with Genome STRiP. | |
export PATH=${SV_DIR}/bwa:${PATH} | |
export LD_LIBRARY_PATH=${SV_DIR}/bwa:${LD_LIBRARY_PATH} | |
mx="-Xmx60g" | |
classpath="${SV_DIR}/lib/SVToolkit.jar:${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar:${SV_DIR}/lib/gatk/Queue.jar" | |
mkdir -p ${runDir}/logs || exit 1 | |
mkdir -p ${runDir}/metadata || exit 1 | |
# Display version information. | |
java -cp ${classpath} ${mx} -jar ${SV_DIR}/lib/SVToolkit.jar | |
# Run preprocessing. | |
# For large scale use, you should use -reduceInsertSizeDistributions, but this is too slow for the installation test. | |
# The method employed by -computeGCProfiles requires a CN2 copy number mask and is currently only supported for human genomes. | |
:<<BLOCK | |
java -cp ${classpath} ${mx} \ | |
org.broadinstitute.sting.queue.QCommandLine \ | |
-S ${SV_DIR}/qscript/SVPreprocess.q \ | |
-S ${SV_DIR}/qscript/SVQScript.q \ | |
-gatk ${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar \ | |
--disableJobReport \ | |
-cp ${classpath} \ | |
-configFile $conf \ | |
-tempDir ${SV_TMPDIR} \ | |
-R ${REF} \ | |
-genomeMaskFile $mask \ | |
-ploidyMapFile $ploidymaps \ | |
-genderMapFile $gender \ | |
-copyNumberMaskFile $cnfa \ | |
-runDirectory ${runDir} \ | |
-md ${runDir}/metadata \ | |
-computeSizesInterval 20:1-63025520 \ | |
-computeGCProfiles \ | |
-jobLogDir ${runDir}/logs \ | |
-I ${bam} \ | |
-useMultiStep \ | |
-run \ | |
|| exit 1 | |
# Run alt allele alignment. | |
java -cp ${classpath} ${mx} \ | |
org.broadinstitute.sting.queue.QCommandLine \ | |
-S ${SV_DIR}/qscript/SVAltAlign.q \ | |
-S ${SV_DIR}/qscript/SVQScript.q \ | |
-gatk ${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar \ | |
--disableJobReport \ | |
-cp ${classpath} \ | |
-configFile $conf \ | |
-tempDir ${SV_TMPDIR} \ | |
-R ${REF} \ | |
-genomeMaskFile $mask \ | |
-runDirectory ${runDir} \ | |
-md ${runDir}/metadata \ | |
-jobLogDir ${runDir}/logs \ | |
-vcf ${sites} \ | |
-I ${bam} \ | |
-O ${runDir}/Altalign.bam \ | |
-run \ | |
|| exit 1 | |
BLOCK | |
# Run genotyping (including the alt allele alignments) | |
java -cp ${classpath} ${mx} \ | |
org.broadinstitute.sting.queue.QCommandLine \ | |
-S ${SV_DIR}/qscript/SVGenotyper.q \ | |
-S ${SV_DIR}/qscript/SVQScript.q \ | |
-gatk ${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar \ | |
--disableJobReport \ | |
-cp ${classpath} \ | |
-configFile $conf \ | |
-tempDir ${SV_TMPDIR} \ | |
-R ${REF} \ | |
-genomeMaskFile $mask \ | |
-genderMapFile $gender \ | |
-runDirectory ${runDir} \ | |
-md ${runDir}/metadata \ | |
-jobLogDir ${runDir}/logs \ | |
-I ${bam} \ | |
-vcf ${sites} \ | |
-O ${genotypes} \ | |
-run \ | |
|| exit 1 | |
(grep -v ^##fileDate= ${genotypes} | grep -v ^##source= | grep -v ^##reference= | diff -q - benchmark/${genotypes}) \ | |
|| { echo "Error: test results do not match benchmark data"; exit 1; } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment