val rdd1 = sc.parallelize(List(("a", 1), ("a", 2), ("b", 1), ("b", 3))) val rdd2 = sc.parallelize(List(("a", 3), ("a", 4), ("b", 1), ("b", 2))) val unionRDD = rdd1.union(rdd2) unionRDD.collect() *** res: Array((a,1), (a,2), (b,1), (b,3), (a,3), (a,4), (b,1), (b,2)) *** val intersectionRDD = rdd1.intersection(rdd2) intersectionRDD.collect() *** res: Array[(String, Int)] = Array((b,1)) *** val joinRDD = rdd1.join(rdd2) joinRDD.collect() *** res: Array[(String, (Int, Int))] = Array((a,(1,3)), (a,(1,4)), (a,(2,3)), (a,(2,4)), (b,(1,1)), (b,(1,2)), (b,(3,1)), (b,(3,2))) *** rdd1.lookup("a") *** res: Seq[Int] = WrappedArray(1, 2) *** unionRDD.lookup("a") *** res: Seq[Int] = WrappedArray(1, 2, 3, 4) *** joinRDD.lookup("a") *** res: Seq[(Int, Int)] = ArrayBuffer((1,3), (1,4), (2,3), (2,4)) ***
|