Last active
December 5, 2019 23:45
-
-
Save agaszmurlo/71f5fd8236b6103171148f788ff80a63 to your computer and use it in GitHub Desktop.
seqtender alignment setup
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export SPARK_HOME=/data/local/opt/spark-2.4.3-bin-hadoop2.7 | |
rm -rf /data/local/cache/ivy2/repository/cache/org.biodatageeks/bdg-seqtender_2.11/ | |
rm /data/local/cache/ivy2/repository/jars/org.biodatageeks_bdg-seqtender_2.11-0.2-SNAPSHOT.jar | |
## master local, defaultFS = HDFS | |
./bin/spark-shell -v \ | |
--master local \ | |
--driver-memory 2g \ | |
--conf "spark.sql.catalogImplementation=in-memory" \ | |
--packages org.biodatageeks:bdg-seqtender_2.11:0.2-SNAPSHOT \ | |
--conf spark.hadoop.yarn.timeline-service.enabled=false \ | |
--conf spark.jars.ivy=/data/local/cache/ivy2/repository/ \ | |
--conf spark.driver.extraJavaOptions=-Dhdp.version=3.1.0.0-78 \ | |
--conf spark.yarn.am.extraJavaOptions=-Dhdp.version=3.1.0.0-78 \ | |
--conf spark.hadoop.metastore.catalog.default=hive \ | |
--repositories http://repo.hortonworks.com/content/repositories/releases/,http://zsibio.ii.pw.edu.pl/nexus/repository/maven-snapshots/ | |
import org.biodatageeks.alignment.{CommandBuilder,SeqTenderAlignment} | |
val reads = "hdfs:///data/seqtender_test/alignment/e_coli_1000_interleaved.ifq" // hdfs location | |
val index = "/data/samples/ecoli/indexes/bowtie2_index/e_coli" // local storage available to alla nodes for index file | |
val cb = new CommandBuilder(reads, index, "bowtie2", interleaved=true) | |
val rdd = SeqTenderAlignment.pipeReads(cb, spark) | |
rdd.count() | |
import org.biodatageeks.alignment.{CommandBuilder,SeqTenderAlignment} | |
val reads = "hdfs:///edugen/fq/coriell_chr1.fq" // hdfs location | |
val index = "/data/samples/chr1_human/indexes/chr1_human" // local storage available to alla nodes for index file | |
val cb = new CommandBuilder(reads, index, "bowtie2", interleaved=true) | |
val rdd = SeqTenderAlignment.pipeReads(cb, spark) | |
spark.time{rdd.count()} | |
# master yarn | |
./bin/spark-shell -v \ | |
--master yarn-client \ | |
--driver-memory 2g --num-executors 20 --executor-memory 2g \ | |
--conf "spark.sql.catalogImplementation=in-memory" \ | |
--packages org.biodatageeks:bdg-seqtender_2.11:0.2-SNAPSHOT \ | |
--conf spark.hadoop.yarn.timeline-service.enabled=false \ | |
--conf spark.jars.ivy=/data/local/cache/ivy2/repository/ \ | |
--conf spark.driver.extraJavaOptions=-Dhdp.version=3.1.0.0-78 \ | |
--conf spark.yarn.am.extraJavaOptions=-Dhdp.version=3.1.0.0-78 \ | |
--conf spark.hadoop.metastore.catalog.default=hive \ | |
--repositories http://repo.hortonworks.com/content/repositories/releases/,http://zsibio.ii.pw.edu.pl/nexus/repository/maven-snapshots/ | |
# wywolanie na human_chr1 | |
cat /data/samples/chr1_human/coriell_chr1.fq | \ | |
docker run --rm -i -v /data/samples/chr1_human/indexes/:/data \ | |
quay.io/biocontainers/bowtie2:2.3.4.3--py27h2d50403_0 \ | |
bowtie2 -x /data/chr1_human --interleaved - | head | |
real 0m59.639s | |
docker run --rm -it -v /data/samples/chr1_human/indexes/:/data \ | |
quay.io/biocontainers/bowtie2:2.3.4.3--py27h2d50403_0 \ | |
bowtie2-inspect -s /data/chr1_human | |
# wywolanie bezposrednio na ecoli | |
cat /data/samples/ecoli/reads/e_coli_1000_interleaved.ifq | \ | |
docker run --rm -it -v /data/samples/ecoli/indexes/bowtie2_index/:/data \ | |
quay.io/biocontainers/bowtie2:2.3.4.3--py27h2d50403_0 \ | |
bowtie2 -x /data/e_coli --interleaved - | |
docker run --rm -it -v /data/samples/ecoli/indexes/bowtie2_index/:/data \ | |
quay.io/biocontainers/bowtie2:2.3.4.3--py27h2d50403_0 \ | |
bowtie2-inspect -s /data/e_coli |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment