Skip to content

Commit f18ce7e

Browse files
author
James Lee
committed
add group by value example
1 parent d0c88c3 commit f18ce7e

2 files changed

Lines changed: 53 additions & 0 deletions

File tree

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package com.sparkTutorial.pairRdd.groupbykey;
2+
3+
public class AirportsProblem {
4+
5+
public static void main(String[] args) throws Exception {
6+
7+
/* TODO: Create a Spark program to read the airport data from in/airports.text, output the the list of the names of the airports located in each country.
8+
9+
Each row of the input file contains the following columns:
10+
11+
Airport ID, Name of airport, Main city served by airport, Country where airport is located, IATA/FAA code, ICAO Code, Latitude, Longitude, Altitude, Timezone, DST, Timezone in Olson format
12+
13+
Sample output:
14+
15+
"Canada", ["Bagotville", "Montreal", "Coronation", ...]
16+
"Norway" : ["Vigra", "Andenes", "Alta", "Bomoen", "Bronnoy",..]
17+
"Papua New Guinea", ["Goroka", "Madang", ...]
18+
...
19+
*/
20+
}
21+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package com.sparkTutorial.pairRdd.groupbykey;
2+
3+
import org.apache.spark.SparkConf;
4+
import org.apache.spark.api.java.JavaPairRDD;
5+
import org.apache.spark.api.java.JavaRDD;
6+
import org.apache.spark.api.java.JavaSparkContext;
7+
import org.apache.spark.api.java.function.PairFunction;
8+
import scala.Tuple2;
9+
10+
public class AirportsSolution {
11+
12+
public static void main(String[] args) throws Exception {
13+
14+
SparkConf conf = new SparkConf().setAppName("airports").setMaster("local[*]");
15+
16+
JavaSparkContext sc = new JavaSparkContext(conf);
17+
18+
JavaRDD<String> lines = sc.textFile("in/airports.text");
19+
20+
JavaPairRDD<String, String> CountryAndAirportNameAndPair = lines.mapToPair((PairFunction<String, String, String>) airport -> new Tuple2<>(airport.split(",")[3], airport.split(",")[1]));
21+
22+
JavaPairRDD<String, Iterable<String>> AirportsByCountry = CountryAndAirportNameAndPair.groupByKey();
23+
24+
for (Tuple2<String, Iterable<String>> airports : AirportsByCountry.collect()) {
25+
System.out.print(airports._1() + " : [");
26+
for (String s : airports._2()) {
27+
System.out.print(s + ", ");
28+
}
29+
System.out.println("]");
30+
}
31+
}
32+
}

0 commit comments

Comments
 (0)