spark actions 算子

时间:2024-04-10 08:36:57
package action;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2; import java.util.Arrays;
import java.util.List;
import java.util.Map; /**
* TODO
*
* @ClassName: actions
* @author: DingH
* @since: 2019/4/2 10:53
*/
public class actions {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("actions").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf); JavaRDD<Integer> parallelize = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5)); JavaPairRDD<String, Integer> rdd = sc.parallelizePairs(Arrays.asList(
new Tuple2<String, Integer>("aaaa", 111),
new Tuple2<String, Integer>("aaaa", 111),
new Tuple2<String, Integer>("bbbb", 222),
new Tuple2<String, Integer>("bbbb", 222),
new Tuple2<String, Integer>("bbbb", 222),
new Tuple2<String, Integer>("ccc", 333)
)); JavaPairRDD<String, Integer> rdd1 = rdd.reduceByKey(new Function2<Integer, Integer, Integer>() {
public Integer call(Integer integer, Integer integer2) throws Exception {
return integer + integer2;
}
}); Tuple2<String, Integer> reduce = rdd1.reduce(new Function2<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple2<String, Integer>>() {
public Tuple2<String, Integer> call(Tuple2<String, Integer> stringIntegerTuple2, Tuple2<String, Integer> stringIntegerTuple22) throws Exception {
Tuple2<String, Integer> stringIntegerTuple21 = new Tuple2<String, Integer>(stringIntegerTuple2._1 + stringIntegerTuple22._1, stringIntegerTuple2._2 + stringIntegerTuple22._2); return stringIntegerTuple21;
}
}); System.out.println(reduce); List<Tuple2<String, Integer>> collect = rdd1.collect();
for (Tuple2<String,Integer> tt:collect){
System.out.println(tt);
} long count = rdd1.count(); Tuple2<String, Integer> first = rdd1.first(); List<Tuple2<String, Integer>> take = rdd1.take(4); List<Tuple2<String, Integer>> tuple2s = rdd1.takeSample(false, 3); rdd1.saveAsTextFile(""); Map<String, Object> stringObjectMap = rdd1.countByKey(); rdd1.foreach(new VoidFunction<Tuple2<String, Integer>>() {
public void call(Tuple2<String, Integer> stringIntegerTuple2) throws Exception {
System.out.println(11);
}
}); sc.stop();
}
}