Skip to content

Commit 4941d26

Browse files
author
James Lee
committed
GroupByKeyVsReduceByKey
1 parent f18ce7e commit 4941d26

1 file changed

Lines changed: 36 additions & 0 deletions

File tree

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package com.sparkTutorial.pairRdd.groupbykey;
2+
3+
import org.apache.spark.SparkConf;
4+
import org.apache.spark.api.java.JavaPairRDD;
5+
import org.apache.spark.api.java.JavaSparkContext;
6+
import scala.Tuple2;
7+
8+
import java.util.Arrays;
9+
import java.util.List;
10+
11+
public class GroupByKeyVsReduceByKey {
12+
13+
public static void main(String[] args) throws Exception {
14+
15+
SparkConf conf = new SparkConf().setAppName("GroupByKeyVsReduceByKey").setMaster("local[*]");
16+
JavaSparkContext sc = new JavaSparkContext(conf);
17+
18+
List<String> words = Arrays.asList("one", "two", "two", "three", "three", "three");
19+
20+
JavaPairRDD<String, Integer> wordsPairRdd = sc.parallelize(words).mapToPair(word -> new Tuple2<>(word, 1));
21+
22+
List<Tuple2<String, Integer>> wordCountsWithReduce = wordsPairRdd.reduceByKey((x, y) -> x + y).collect();
23+
24+
List<Tuple2<String, Integer>> wordCountsWithGroup = wordsPairRdd.groupByKey()
25+
.mapToPair(word -> new Tuple2<>(word._1(), getSum(word._2()))).collect();
26+
}
27+
28+
private static int getSum(Iterable<Integer> integers) {
29+
int sum = 0;
30+
for (Integer integer : integers) {
31+
sum = + integer;
32+
}
33+
return sum;
34+
}
35+
}
36+

0 commit comments

Comments
 (0)