Skip to content

Instantly share code, notes, and snippets.

@agaszmurlo
Last active May 20, 2019 13:49
Show Gist options
  • Save agaszmurlo/ae0dc1d629bd34fbb95ddef6d8a88aab to your computer and use it in GitHub Desktop.
Save agaszmurlo/ae0dc1d629bd34fbb95ddef6d8a88aab to your computer and use it in GitHub Desktop.
cd /data/local/opt/spark-2.4.0-bin-hadoop2.7/bin
./spark-shell -v --master=yarn --deploy-mode=client --num-executors=60 --executor-memory=4g --driver-memory=12g --conf spark.sql.catalogImplementation=in-memory --conf spark.jars.ivy=/data/local/cache/ivy2/repository --conf spark.hadoop.yarn.timeline-service.enabled=false --repositories http://zsibio.ii.pw.edu.pl/nexus/repository/maven-releases/,http://zsibio.ii.pw.edu.pl/nexus/repository/maven-snapshots/ --packages org.biodatageeks:bdg-sequila_2.11:0.5.5-spark-2.4.2-SNAPSHOT
sc.setLogLevel("WARN")
import org.apache.spark.sql.SequilaSession
import org.biodatageeks.utils.{SequilaRegister, UDFRegister,BDGInternalParams}
val ss = SequilaSession(spark)
SequilaRegister.register(ss)
ss.sqlContext.setConf("spark.biodatageeks.bam.useGKLInflate","true")
ss.sqlContext.setConf("spark.biodatageeks.bam.useSparkBAM","false")
ss.sql("""
CREATE TABLE IF NOT EXISTS qreads
USING org.biodatageeks.datasources.BAM.BAMDataSource
OPTIONS(path '/igap/dev/rel5-guppy*.bam')""")
// /data/granges/nanopore/guppy.bam
val cov = ss.sql(s"SELECT contigName, start, end, coverage FROM bdg_coverage('qreads','rel5-guppy-0', 'blocks')")
spark.time{cov.count}
//spark.time{cov.write.mode("overwrite").option("delimiter", "\t").csv("/igap/dev/guppy_cov.bed")}
//spark.time{cov.coalesce(1).write.mode("overwrite").option("delimiter", "\t").csv("/data/granges/nanopore/guppy_cov.bed")}
session.sqlContext.setConf(BDGInternalParams.ShowAllPositions, "true")
ss.sqlContext.setConf(BDGInternalParams.ShowAllPositions, "true")
val cov_all = ss.sql(s"SELECT contigName, start, coverage FROM bdg_coverage('qreads','NA12878', 'bases')")
spark.time{cov_all.coalesce(1).write.mode("overwrite").option("delimiter", "\t").csv("/data/granges/nanopore/guppy_cov_all.bed")}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment