forked from jleetutorial/python-spark-tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathUkMakerSpaces.py
More file actions
22 lines (15 loc) · 888 Bytes
/
UkMakerSpaces.py
File metadata and controls
22 lines (15 loc) · 888 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from pyspark.sql import SparkSession, functions as fs
if __name__ == "__main__":
session = SparkSession.builder.appName("UkMakerSpaces").master("local[*]").getOrCreate()
makerSpace = session.read.option("header", "true") \
.csv("in/uk-makerspaces-identifiable-data.csv")
postCode = session.read.option("header", "true").csv("in/uk-postcode.csv") \
.withColumn("PostCode", fs.concat_ws("", fs.col("PostCode"), fs.lit(" ")))
print("=== Print 20 records of makerspace table ===")
makerSpace.select("Name of makerspace", "Postcode").show()
print("=== Print 20 records of postcode table ===")
postCode.select("PostCode", "Region").show()
joined = makerSpace \
.join(postCode, makerSpace["Postcode"].startswith(postCode["Postcode"]), "left_outer")
print("=== Group by Region ===")
joined.groupBy("Region").count().show(200)