Created
May 16, 2017 14:46
-
-
Save aorjoa/8dd4962b680e548b31b3b429458e8113 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
echo "rdd-WordCount-sparktest-sha256-java-checkpoint-with-cache-in-one-test" | |
testname="rdd-WordCount-sparktest-sha256-java-checkpoint-with-cache-in-one-test" | |
echo -e ''' | |
package th.ac.sut.aiyara.sparktest | |
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint} | |
import com.bloomberg.sparkflow | |
import org.scalatest.FunSuite | |
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc} | |
/** | |
* Created by Bhuridech Sudsee. | |
*/ | |
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{ | |
// //Checkpoint directory | |
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/") | |
test("Test spark MapReduce wordcount job#1") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#2") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#3") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#4") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#5") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#6") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#7") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#8") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#9") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#10") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
} | |
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala | |
for round in {1..5};do | |
time for testno in {1..1};do | |
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno | |
sbt testOnCluster -Dserializer=java -Dhash=sha256 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \ | |
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname | |
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/. | |
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/. | |
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
rm -rf /mnt/ceph/sparktestCheckpoint/* | |
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done | |
echo "rdd-WordCount-sparktest-sha256-kryo-checkpoint-with-cache-in-one-test" | |
testname="rdd-WordCount-sparktest-sha256-kryo-checkpoint-with-cache-in-one-test" | |
echo -e ''' | |
package th.ac.sut.aiyara.sparktest | |
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint} | |
import com.bloomberg.sparkflow | |
import org.scalatest.FunSuite | |
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc} | |
/** | |
* Created by Bhuridech Sudsee. | |
*/ | |
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{ | |
// //Checkpoint directory | |
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/") | |
test("Test spark MapReduce wordcount job#1") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#2") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#3") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#4") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#5") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#6") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#7") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#8") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#9") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#10") { | |
val textFile = sc.textFile("/mnt/ceph/thwikipedia.data") | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupBy(_.toLowerCase) | |
.map(w => (w._1, w._2.size)).cache() | |
counts.sparktestCheckpoint() | |
assert(counts.count() === 12670265) | |
} | |
} | |
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala | |
for round in {1..5};do | |
time for testno in {1..1};do | |
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno | |
sbt testOnCluster -Dserializer=kryo -Dhash=sha256 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \ | |
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname | |
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/. | |
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/. | |
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
rm -rf /mnt/ceph/sparktestCheckpoint/* | |
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done | |
#!/bin/bash | |
echo "dataset-WordCount-original-checkpoint-with-cache-in-one-test" | |
testname="dataset-WordCount-original-checkpoint-with-cache-in-one-test" | |
echo -e ''' | |
package th.ac.sut.aiyara.sparktest | |
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint} | |
import com.bloomberg.sparkflow | |
import org.scalatest.FunSuite | |
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc} | |
/** | |
* Created by Bhuridech Sudsee. | |
*/ | |
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{ | |
// //Checkpoint directory | |
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/") | |
test("Test spark MapReduce wordcount job#1") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.checkpoint() | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#2") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.checkpoint() | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#3") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.checkpoint() | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#4") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.checkpoint() | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#5") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.checkpoint() | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#6") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.checkpoint() | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#7") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.checkpoint() | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#8") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.checkpoint() | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#9") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.checkpoint() | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#10") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.checkpoint() | |
assert(ds.count() === 12670265) | |
} | |
} | |
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala | |
for round in {1..5};do | |
time for testno in {1..1};do | |
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno | |
sbt testOnCluster -Dserializer=parquet -Dhash=md5 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \ | |
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname | |
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/. | |
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/. | |
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
rm -rf /mnt/ceph/sparktestCheckpoint/* | |
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done | |
echo "dataset-WordCount-sparktest-md5-parquet-checkpoint-with-cache-in-one-test" | |
testname="dataset-WordCount-sparktest-md5-parquet-checkpoint-with-cache-in-one-test" | |
echo -e ''' | |
package th.ac.sut.aiyara.sparktest | |
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint} | |
import com.bloomberg.sparkflow | |
import org.scalatest.FunSuite | |
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc} | |
/** | |
* Created by Bhuridech Sudsee. | |
*/ | |
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{ | |
// //Checkpoint directory | |
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/") | |
test("Test spark MapReduce wordcount job#1") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#2") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#3") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#4") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#5") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#6") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#7") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#8") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#9") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#10") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
} | |
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala | |
for round in {1..5};do | |
time for testno in {1..1};do | |
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno | |
sbt testOnCluster -Dserializer=parquet -Dhash=md5 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \ | |
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname | |
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/. | |
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/. | |
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
rm -rf /mnt/ceph/sparktestCheckpoint/* | |
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done | |
echo "dataset-WordCount-sparktest-sha1-parquet-checkpoint-with-cache-in-one-test" | |
testname="dataset-WordCount-sparktest-sha1-parquet-checkpoint-with-cache-in-one-test" | |
echo -e ''' | |
package th.ac.sut.aiyara.sparktest | |
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint} | |
import com.bloomberg.sparkflow | |
import org.scalatest.FunSuite | |
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc} | |
/** | |
* Created by Bhuridech Sudsee. | |
*/ | |
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{ | |
// //Checkpoint directory | |
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/") | |
test("Test spark MapReduce wordcount job#1") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#2") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#3") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#4") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#5") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#6") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#7") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#8") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#9") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#10") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
} | |
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala | |
for round in {1..5};do | |
time for testno in {1..1};do | |
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno | |
sbt testOnCluster -Dserializer=parquet -Dhash=sha1 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \ | |
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname | |
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/. | |
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/. | |
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
rm -rf /mnt/ceph/sparktestCheckpoint/* | |
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done | |
echo "dataset-WordCount-sparktest-sha256-parquet-checkpoint-with-cache-in-one-test" | |
testname="dataset-WordCount-sparktest-sha256-parquet-checkpoint-with-cache-in-one-test" | |
echo -e ''' | |
package th.ac.sut.aiyara.sparktest | |
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint} | |
import com.bloomberg.sparkflow | |
import org.scalatest.FunSuite | |
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc} | |
/** | |
* Created by Bhuridech Sudsee. | |
*/ | |
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{ | |
// //Checkpoint directory | |
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/") | |
test("Test spark MapReduce wordcount job#1") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#2") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#3") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#4") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#5") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#6") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#7") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#8") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#9") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#10") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
} | |
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala | |
for round in {1..5};do | |
time for testno in {1..1};do | |
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno | |
sbt testOnCluster -Dserializer=parquet -Dhash=sha256 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \ | |
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname | |
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/. | |
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/. | |
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
rm -rf /mnt/ceph/sparktestCheckpoint/* | |
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done | |
echo "dataset-WordCount-sparktest-md5-avro-checkpoint-with-cache-in-one-test" | |
testname="dataset-WordCount-sparktest-md5-avro-checkpoint-with-cache-in-one-test" | |
echo -e ''' | |
package th.ac.sut.aiyara.sparktest | |
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint} | |
import com.bloomberg.sparkflow | |
import org.scalatest.FunSuite | |
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc} | |
/** | |
* Created by Bhuridech Sudsee. | |
*/ | |
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{ | |
// //Checkpoint directory | |
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/") | |
test("Test spark MapReduce wordcount job#1") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#2") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#3") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#4") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#5") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#6") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#7") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#8") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#9") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#10") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
} | |
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala | |
for round in {1..5};do | |
time for testno in {1..1};do | |
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno | |
sbt testOnCluster -Dserializer=avro -Dhash=md5 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \ | |
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname | |
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/. | |
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/. | |
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
rm -rf /mnt/ceph/sparktestCheckpoint/* | |
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done | |
echo "dataset-WordCount-sparktest-sha1-avro-checkpoint-with-cache-in-one-test" | |
testname="dataset-WordCount-sparktest-sha1-avro-checkpoint-with-cache-in-one-test" | |
echo -e ''' | |
package th.ac.sut.aiyara.sparktest | |
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint} | |
import com.bloomberg.sparkflow | |
import org.scalatest.FunSuite | |
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc} | |
/** | |
* Created by Bhuridech Sudsee. | |
*/ | |
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{ | |
// //Checkpoint directory | |
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/") | |
test("Test spark MapReduce wordcount job#1") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#2") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#3") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#4") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#5") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#6") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#7") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#8") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#9") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#10") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
} | |
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala | |
for round in {1..5};do | |
time for testno in {1..1};do | |
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno | |
sbt testOnCluster -Dserializer=avro -Dhash=sha1 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \ | |
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname | |
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/. | |
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/. | |
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
rm -rf /mnt/ceph/sparktestCheckpoint/* | |
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done | |
echo "dataset-WordCount-sparktest-sha256-avro-checkpoint-with-cache-in-one-test" | |
#!/bin/bash | |
testname="dataset-WordCount-sparktest-sha256-avro-checkpoint-with-cache-in-one-test" | |
echo -e ''' | |
package th.ac.sut.aiyara.sparktest | |
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint} | |
import com.bloomberg.sparkflow | |
import org.scalatest.FunSuite | |
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc} | |
/** | |
* Created by Bhuridech Sudsee. | |
*/ | |
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{ | |
// //Checkpoint directory | |
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/") | |
test("Test spark MapReduce wordcount job#1") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#2") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#3") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#4") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#5") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#6") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#7") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#8") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#9") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#10") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
} | |
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala | |
for round in {1..5};do | |
time for testno in {1..1};do | |
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno | |
sbt testOnCluster -Dserializer=avro -Dhash=sha256 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \ | |
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname | |
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/. | |
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/. | |
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
rm -rf /mnt/ceph/sparktestCheckpoint/* | |
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done | |
echo "dataset-WordCount-sparkflow-checkpoint-with-cache-in-one-test" | |
testname="dataset-WordCount-sparkflow-checkpoint-with-cache-in-one-test" | |
echo -e ''' | |
package th.ac.sut.aiyara.sparktest | |
import org.apache.spark.cprdd.{ImplicitDatasetCheckpoint, ImplicitRDDCheckpoint} | |
import com.bloomberg.sparkflow | |
import org.scalatest.FunSuite | |
import th.ac.sut.aiyara.sparktest.utils.{ConfigSpark, MathFunc} | |
/** | |
* Created by Bhuridech Sudsee. | |
*/ | |
class CheckpointingTest extends FunSuite with ImplicitRDDCheckpoint with ImplicitDatasetCheckpoint with ConfigSpark with MathFunc{ | |
// //Checkpoint directory | |
sc.setCheckpointDir("/mnt/ceph/sparkOriginalCheckpoint/") | |
test("Test spark MapReduce wordcount job#1") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#2") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#3") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#4") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#5") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#6") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#7") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#8") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#9") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
test("Test spark MapReduce wordcount job#10") { | |
import spark.implicits._ | |
val textFile = spark.read.text("/mnt/ceph/thwikipedia.data").as[String] | |
val counts = textFile.flatMap(_.split(" ")) | |
.filter(_ != "") | |
.groupByKey(_.toLowerCase).keys.cache() | |
val ds = counts.sparktestCheckpoint(counts.getEncoder) | |
assert(ds.count() === 12670265) | |
} | |
} | |
''' > /home/aorjoa/Repository/sparktest/src/test/scala/th/ac/sut/aiyara/sparktest/CheckpointingTest.scala | |
for round in {1..5};do | |
time for testno in {1..1};do | |
echo -e ">>>>>>>>> Test # $testno\n" | tee -a logs/testlog.$testname.$round-t.$testno | |
sbt testOnCluster -Dserializer=avro -Dhash=sha256 -Dsparktest.path="/mnt/ceph/sparktestCheckpoint/" -Dspark.master=spark://aiyara-testing-1:7077 -Dappname=$testname \ | |
2>&1 | tee -a logs/testlog.$testname.$round-t.$testno;done | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname | |
mkdir -p /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname | |
mv logs/testlog.$testname.* /home/aorjoa/Desktop/TestReport/NEW_DATA/logs/$testname/. | |
mv /mnt/ceph/sparkEventLogs/* /home/aorjoa/Desktop/TestReport/NEW_DATA/events/$testname/. | |
echo -e "===================== Storage Usage ========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparktestCheckpoint/* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
du -sh /mnt/ceph/sparkOriginalCheckpoint/*.* | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
echo -e "===================== Storage Tree =========================\n" | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparktestCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
tree /mnt/ceph/sparkOriginalCheckpoint/. | tee -a /home/aorjoa/Desktop/TestReport/NEW_DATA/storagetree/$testname/storage.$round | |
rm -rf /mnt/ceph/sparktestCheckpoint/* | |
rm -rf /mnt/ceph/sparkOriginalCheckpoint/*;done | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment