Last active
December 5, 2019 23:44
-
-
Save agaszmurlo/3ca480f3332a165ca4767ab6ff952798 to your computer and use it in GitHub Desktop.
haplotype_caller spark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cd /data/samples/CORRIELL/mbi_cwiczenie3 | |
### create sequence dictionary | |
docker run --rm -it \ | |
-v /data/samples/CORRIELL/mbi_cwiczenie3:/data \ | |
broadinstitute/picard \ | |
CreateSequenceDictionary \ | |
R=/data/chr1.fa \ | |
O=/data/chr1.dict | |
## validate BAM file | |
docker run --rm -it \ | |
-v /data/samples/CORRIELL/mbi_cwiczenie3:/data \ | |
broadinstitute/picard \ | |
ValidateSamFile \ | |
I=/data/coriell_chr1.bam \ | |
MODE=SUMMARY | |
# add @RG to BAM | |
docker run --rm -it \ | |
-v /data/samples/CORRIELL/mbi_cwiczenie3:/data \ | |
broadinstitute/picard \ | |
AddOrReplaceReadGroups \ | |
I=/data/coriell_chr1.bam \ | |
O=/data/coriell_chr1_rg.bam \ | |
RGLB=lib1 \ | |
RGPL=illumina \ | |
RGPU=12 \ | |
RGSM=1234 | |
# validate again | |
docker run --rm -it \ | |
-v /data/samples/CORRIELL/mbi_cwiczenie3:/data \ | |
broadinstitute/picard \ | |
ValidateSamFile \ | |
I=/data/coriell_chr1_rg.bam \ | |
MODE=SUMMARY | |
#create index for BAM file | |
samtools index coriell_chr1_rg.bam | |
# run regular HC | |
docker run --rm -it \ | |
-v /data/samples/CORRIELL/mbi_cwiczenie3:/data \ | |
broadinstitute/gatk \ | |
gatk HaplotypeCaller \ | |
-R /data/chr1.fa \ | |
-I /data/coriell_chr1_rg.bam \ | |
-O /data/coriell_chr1.vcf.gz | |
# 13:23:12.145 INFO ProgressMeter - Traversal complete. Processed 839864 total regions in 2.1 minutes. | |
# run HC on Spark (local) | |
docker run --rm -it \ | |
-v /data/samples/CORRIELL/mbi_cwiczenie3:/data \ | |
broadinstitute/gatk \ | |
gatk HaplotypeCallerSpark \ | |
-R /data/chr1.fa \ | |
-I /data/coriell_chr1_rg.bam \ | |
-O /data/coriell_chr1_spark.vcf.gz | |
# [December 5, 2019 1:49:41 PM UTC] org.broadinstitute.hellbender.tools.HaplotypeCallerSpark done. Elapsed time: 2.49 minutes. | |
# run HC on Spark (YARN) | |
docker run --rm -it \ | |
-v /data/samples/CORRIELL/mbi_cwiczenie3:/data \ | |
-v $HADOOP_CONF_DIR:$HADOOP_CONF_DIR \ | |
-e HADOOP_CONF_DIR=$HADOOP_CONF_DIR \ | |
-v $SPARK_HOME:$SPARK_HOME \ | |
-e SPARK_HOME=$SPARK_HOME \ | |
broadinstitute/gatk \ | |
gatk HaplotypeCallerSpark \ | |
--spark-runner SPARK \ | |
--spark-master yarn \ | |
--conf "spark.executor.memory=2g" \ | |
--conf "spark.driver.memory=2g" \ | |
--conf "spark.executor.instances=20" \ | |
--conf "spark.hadoop.yarn.resourcemanager.address=192.168.157.201:8032" \ | |
-R /data/chr1.fa \ | |
-I /data/coriell_chr1_rg.bam \ | |
-O /data/coriell_chr1_spark.vcf.gz | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment