Skip to content

Instantly share code, notes, and snippets.

@agaszmurlo
Last active May 6, 2019 11:22
Show Gist options
  • Save agaszmurlo/46e8e3abfca6baaa70fe9142b272a318 to your computer and use it in GitHub Desktop.
Save agaszmurlo/46e8e3abfca6baaa70fe9142b272a318 to your computer and use it in GitHub Desktop.
sequila sample script
////// RUN
// map volumes according to your data directory
docker run -it --rm \
-e USERID=$UID -e GROUPID=$(id -g) \
-v /Users/aga/workplace/data/slice/:/data \
biodatageeks/bdg-sequila:0.5.5-spark-2.4.2-SNAPSHOT \
spark-shell --driver-memory=4g \
--jars /tmp/bdg-toolset/bdg-sequila-assembly-0.5.5-spark-2.4.2-SNAPSHOT.jar \
--conf spark.sql.warehouse.dir=/home/bdgeek/spark-warehouse
//////// SCRIPT
sc.setLogLevel("WARN")
import org.apache.spark.sql.SequilaSession
import org.biodatageeks.utils.{SequilaRegister, UDFRegister,BDGInternalParams}
val ss = SequilaSession(spark)
SequilaRegister.register(ss)
ss.sqlContext.setConf("spark.biodatageeks.bam.useGKLInflate","true")
ss.sqlContext.setConf("spark.biodatageeks.bam.useSparkBAM","false")
ss.sql("CREATE DATABASE IF NOT EXISTS dna")
ss.sql("USE dna")
val bamPath = "/data/NA12878.slice.bam"
val tableNameBAM = "reads"
ss.sql(
s"""
|CREATE TABLE ${tableNameBAM}
|USING org.biodatageeks.datasources.BAM.BAMDataSource
|OPTIONS(path "${bamPath}")
|
""".stripMargin)
ss.sql(s"SELECT * FROM bdg_coverage('${tableNameBAM}','NA12878.slice', 'blocks')").show(5)
val testPath = "/data/test2.bam"
val testTable = "reads_test"
ss.sql(
s"""
|CREATE TABLE ${testTable}
|USING org.biodatageeks.datasources.BAM.BAMDataSource
|OPTIONS(path "${testPath}")
|
""".stripMargin)
ss.sql(s"SELECT * FROM bdg_coverage('${testTable}','test2', 'blocks')").show(5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment