Created
April 10, 2019 06:28
-
-
Save krishnasrinivas/75c89c0598d3255d61bf45d5d1104344 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
echo "please ensure that this script is running on a spark-master node" | |
sudo apt update | |
sudo apt install -y openjdk-11-jdk | |
sudo apt install -y scala | |
echo "deb https://dl.bintray.com/sbt/debian /" | sudo tee -a /etc/apt/sources.list.d/sbt.list | |
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 2EE0EA64E40A89B84B2DF73499E82A75642AC823 | |
sudo apt-get update | |
sudo apt-get install sbt | |
mkdir -p apps | |
cd apps | |
mkdir -p count | |
cd count | |
cat << EOF > build.sbt | |
name := "Counter" | |
version := "1.0" | |
scalaVersion := "2.12.8" | |
libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.4.1" | |
EOF | |
mkdir -p src/main/scala | |
cat << EOF > src/main/scala/count.scala | |
import org.apache.spark.sql.SparkSession | |
object Counter { | |
def main(args: Array[String]) { | |
val spark = SparkSession.builder. | |
appName("Counter") | |
.config("spark.speculation", "false") | |
.config("spark.hadoop.fs.s3a.path.style.access","true") | |
.config("spark.hadoop.fs.s3a.connection.maximum", 500).getOrCreate() | |
// can also set options after building the session | |
spark.sparkContext.hadoopConfiguration.set("fs.s3a.endpoint","http://minio:9000") | |
spark.sparkContext.hadoopConfiguration.set("fs.s3a.access.key","minio") | |
spark.sparkContext.hadoopConfiguration.set("fs.s3a.secret.key","minio123") | |
spark.sparkContext.hadoopConfiguration.set("fs.s3a.path.style.access","true") | |
val textFile = spark.read.textfile("s3a://data-set/nyc_taxi_data.csv") | |
val counts = textFile.count() | |
println(s"Total taxi rides: $counts") | |
spark.stop() | |
} | |
} | |
EOF | |
echo "please change the address of minio in src/main/scala/count.scala to point to the correct instance of minio in your datacenter and recompile using 'sbt package'" | |
## compile the program | |
sbt package | |
export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64/ | |
export HADOOP_HOME=${HOME}/hadoop-3.1.2 | |
export PATH=${HADOOP_HOME}/bin:$PATH | |
export SPARK_DIST_CLASSPATH=$(hadoop classpath) | |
export SPARK_HOME=${HOME}/spark-2.4.1-bin-without-hadoop | |
export PATH=${SPARK_HOME}/bin:$PATH | |
spark-submit --deploy-mode=client --master spark://$(hostname):7077 --packages org.apache.hadoop:hadoop-aws:3.1.2 target/scala-2.12/counter_2.12-1.0.jar | |
mkdir -p src/main/scala | |
cat << EOF > src/main/scala/count.scala | |
import org.apache.spark.sql.SparkSession | |
object Counter { | |
def main(args: Array[String]) { | |
val spark = SparkSession.builder. | |
appName("Counter") | |
.config("spark.speculation", "false") | |
.config("spark.hadoop.fs.s3a.path.style.access","true") | |
.config("spark.hadoop.fs.s3a.connection.maximum", 500).getOrCreate() | |
// can also set options after building the session | |
spark.sparkContext.hadoopConfiguration.set("fs.s3a.endpoint","http://minio:9000") | |
spark.sparkContext.hadoopConfiguration.set("fs.s3a.access.key","minio") | |
spark.sparkContext.hadoopConfiguration.set("fs.s3a.secret.key","minio123") | |
spark.sparkContext.hadoopConfiguration.set("fs.s3a.path.style.access","true") | |
val textFile = spark.read.textfile("s3a://data-set/nyc_taxi_data.csv") | |
val counts = textFile.count() | |
println(s"Total taxi rides: $counts") | |
spark.stop() | |
} | |
} | |
EOF | |
echo "please change the address of minio in src/main/scala/count.scala to point to the correct instance of minio in your datacenter and recompile using 'sbt package'" | |
## compile the program | |
sbt package | |
export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64/ | |
export HADOOP_HOME=${HOME}/hadoop-3.1.2 | |
export PATH=${HADOOP_HOME}/bin:$PATH | |
export SPARK_DIST_CLASSPATH=$(hadoop classpath) | |
export SPARK_HOME=${HOME}/spark-2.4.1-bin-without-hadoop | |
export PATH=${SPARK_HOME}/bin:$PATH | |
spark-submit --deploy-mode=client --master spark://$(hostname):7077 --packages org.apache.hadoop:hadoop-aws:3.1.2 target/scala-2.12/counter_2.12-1.0.jar |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment