Skip to content

Commit 4d7b7a4

Browse files
author
Pedro Bernardo
committed
Added rdd/count/CountExample.py
1 parent 8838805 commit 4d7b7a4

1 file changed

Lines changed: 11 additions & 0 deletions

File tree

rdd/count/CountExample.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from pyspark import SparkContext
2+
3+
if __name__ == "__main__":
4+
sc = SparkContext("local", "count")
5+
inputWords = ["spark", "hadoop", "spark", "hive", "pig", "cassandra", "hadoop"]
6+
wordRdd = sc.parallelize(inputWords)
7+
print("Count: {}".format(wordRdd.count()))
8+
worldCountByValue = wordRdd.countByValue()
9+
print("CountByValue: ")
10+
for word, count in worldCountByValue.items():
11+
print("{} : {}".format(word, count))

0 commit comments

Comments
 (0)