|
| 1 | +package com.sparkTutorial.pairRdd.join; |
| 2 | + |
| 3 | +import org.apache.spark.SparkConf; |
| 4 | +import org.apache.spark.api.java.JavaPairRDD; |
| 5 | +import org.apache.spark.api.java.JavaSparkContext; |
| 6 | +import org.apache.spark.api.java.Optional; |
| 7 | +import scala.Tuple2; |
| 8 | + |
| 9 | +import java.util.Arrays; |
| 10 | + |
| 11 | +public class JoinOperations { |
| 12 | + |
| 13 | + public static void main(String[] args) throws Exception { |
| 14 | + |
| 15 | + SparkConf conf = new SparkConf().setAppName("JoinOperations").setMaster("local[1]"); |
| 16 | + |
| 17 | + JavaSparkContext sc = new JavaSparkContext(conf); |
| 18 | + |
| 19 | + JavaPairRDD<String, Integer> ages = sc.parallelizePairs(Arrays.asList(new Tuple2<>("Tom", 29), |
| 20 | + new Tuple2<>("John", 22))); |
| 21 | + |
| 22 | + JavaPairRDD<String, String> addresses = sc.parallelizePairs(Arrays.asList(new Tuple2<>("James", "USA"), |
| 23 | + new Tuple2<>("John", "UK"))); |
| 24 | + |
| 25 | + JavaPairRDD<String, Tuple2<Integer, String>> join = ages.join(addresses); |
| 26 | + |
| 27 | + join.saveAsTextFile("out/age_address_join.text"); |
| 28 | + |
| 29 | + JavaPairRDD<String, Tuple2<Integer, Optional<String>>> leftOuterJoin = ages.leftOuterJoin(addresses); |
| 30 | + |
| 31 | + leftOuterJoin.saveAsTextFile("out/age_address_left_out_join.text"); |
| 32 | + |
| 33 | + JavaPairRDD<String, Tuple2<Optional<Integer>, String>> rightOuterJoin = ages.rightOuterJoin(addresses); |
| 34 | + |
| 35 | + rightOuterJoin.saveAsTextFile("out/age_address_right_out_join.text"); |
| 36 | + |
| 37 | + JavaPairRDD<String, Tuple2<Optional<Integer>, Optional<String>>> fullOuterJoin = ages.fullOuterJoin(addresses); |
| 38 | + |
| 39 | + fullOuterJoin.saveAsTextFile("out/age_address_full_out_join.text"); |
| 40 | + |
| 41 | + |
| 42 | + } |
| 43 | +} |
0 commit comments