c0mpiler · March 22, 2023 19:34
diff --git a/test1.py b/test1.py
 import sys
 from operator import add

 from pyspark.sql import SparkSession


 if __name__ == "__main__":
    spark = SparkSession\
        .builder\
        .appName("PythonWordCount")\
        .getOrCreate()
    sc=spark.sparkContext

    words = 'the quick brown fox jumps over the\
        lazy dog the quick brown fox jumps over the lazy dog'

    seq = words.split()
    # sc is SparkContext that is created with pyspark
    data = sc.parallelize(seq)
    counts = data.map(lambda word: (word, 1)).reduceByKey(lambda a, b: a + b).collect()
    dict(counts)
    sc.stop()
	import sys
	from operator import add

	from pyspark.sql import SparkSession


	if __name__ == "__main__":
	spark = SparkSession\
	.builder\
	.appName("PythonWordCount")\
	.getOrCreate()
	sc=spark.sparkContext

	words = 'the quick brown fox jumps over the\
	lazy dog the quick brown fox jumps over the lazy dog'

	seq = words.split()
	# sc is SparkContext that is created with pyspark
	data = sc.parallelize(seq)
	counts = data.map(lambda word: (word, 1)).reduceByKey(lambda a, b: a + b).collect()
	dict(counts)
	sc.stop()