Hadoop学习之路(二十五)MapReduce的API使用(二)

时间:2023-03-10 05:19:44
Hadoop学习之路(二十五)MapReduce的API使用(二)

学生成绩---增强版

数据信息

 computer,huangxiaoming,85,86,41,75,93,42,85
computer,xuzheng,54,52,86,91,42
computer,huangbo,85,42,96,38
english,zhaobenshan,54,52,86,91,42,85,75
english,liuyifei,85,41,75,21,85,96,14
algorithm,liuyifei,75,85,62,48,54,96,15
computer,huangjiaju,85,75,86,85,85
english,liuyifei,76,95,86,74,68,74,48
english,huangdatou,48,58,67,86,15,33,85
algorithm,huanglei,76,95,86,74,68,74,48
algorithm,huangjiaju,85,75,86,85,85,74,86
computer,huangdatou,48,58,67,86,15,33,85
english,zhouqi,85,86,41,75,93,42,85,75,55,47,22
english,huangbo,85,42,96,38,55,47,22
algorithm,liutao,85,75,85,99,66
computer,huangzitao,85,86,41,75,93,42,85
math,wangbaoqiang,85,86,41,75,93,42,85
computer,liujialing,85,41,75,21,85,96,14,74,86
computer,liuyifei,75,85,62,48,54,96,15
computer,liutao,85,75,85,99,66,88,75,91
computer,huanglei,76,95,86,74,68,74,48
english,liujialing,75,85,62,48,54,96,15
math,huanglei,76,95,86,74,68,74,48
math,huangjiaju,85,75,86,85,85,74,86
math,liutao,48,58,67,86,15,33,85
english,huanglei,85,75,85,99,66,88,75,91
math,xuzheng,54,52,86,91,42,85,75
math,huangxiaoming,85,75,85,99,66,88,75,91
math,liujialing,85,86,41,75,93,42,85,75
english,huangxiaoming,85,86,41,75,93,42,85
algorithm,huangdatou,48,58,67,86,15,33,85
algorithm,huangzitao,85,86,41,75,93,42,85,75

数据解释

数据字段个数不固定:
第一个是课程名称,总共四个课程,computer,math,english,algorithm,
第二个是学生姓名,后面是每次考试的分数

统计需求

1、统计每门课程的参考人数和课程平均分

2、统计每门课程参考学生的平均分,并且按课程存入不同的结果文件,要求一门课程一个结果文件,并且按平均分从高到低排序,分数保留一位小数

3、求出每门课程参考学生成绩最高的学生的信息:课程,姓名和平均分

第一题

MRAvgScore1.java

 /**
* 需求:统计每门课程的参考人数和课程平均分
* */
public class MRAvgScore1 { public static void main(String[] args) throws Exception { Configuration conf1 = new Configuration();
Configuration conf2 = new Configuration(); Job job1 = Job.getInstance(conf1);
Job job2 = Job.getInstance(conf2); job1.setJarByClass(MRAvgScore1.class);
job1.setMapperClass(AvgScoreMapper1.class);
//job.setReducerClass(MFReducer.class); job1.setOutputKeyClass(Text.class);
job1.setOutputValueClass(DoubleWritable.class); Path inputPath1 = new Path("D:\\MR\\hw\\work3\\input");
Path outputPath1 = new Path("D:\\MR\\hw\\work3\\output_hw1_1"); FileInputFormat.setInputPaths(job1, inputPath1);
FileOutputFormat.setOutputPath(job1, outputPath1); job2.setMapperClass(AvgScoreMapper2.class);
job2.setReducerClass(AvgScoreReducer2.class); job2.setOutputKeyClass(Text.class);
job2.setOutputValueClass(DoubleWritable.class); Path inputPath2 = new Path("D:\\MR\\hw\\work3\\output_hw1_1");
Path outputPath2 = new Path("D:\\MR\\hw\\work3\\output_hw1_end"); FileInputFormat.setInputPaths(job2, inputPath2);
FileOutputFormat.setOutputPath(job2, outputPath2); JobControl control = new JobControl("AvgScore"); ControlledJob aJob = new ControlledJob(job1.getConfiguration());
ControlledJob bJob = new ControlledJob(job2.getConfiguration()); bJob.addDependingJob(aJob); control.addJob(aJob);
control.addJob(bJob); Thread thread = new Thread(control);
thread.start(); while(!control.allFinished()) {
thread.sleep(1000);
}
System.exit(0); } /**
* 数据类型:computer,huangxiaoming,85,86,41,75,93,42,85
*
* 需求:统计每门课程的参考人数和课程平均分
*
* 分析:以课程名称+姓名作为key,以平均分数作为value
* */
public static class AvgScoreMapper1 extends Mapper<LongWritable, Text, Text, DoubleWritable>{ @Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException { String[] splits = value.toString().split(",");
//拼接成要输出的key
String outKey = splits[0]+"\t"+splits[1];
int length = splits.length;
int sum = 0;
//求出成绩的总和
for(int i=2;i<length;i++) {
sum += Integer.parseInt(splits[i]);
}
//求出平均分
double outValue = sum / (length - 2); context.write(new Text(outKey), new DoubleWritable(outValue)); } } /**
* 对第一次MapReduce输出的结果进一步计算,第一步输出结果样式为
* math huangjiaju 82.0
* math huanglei 74.0
* math huangxiaoming 83.0
* math liujialing 72.0
* math liutao 56.0
* math wangbaoqiang 72.0
* math xuzheng 69.0
*
* 需求:统计每门课程的参考人数和课程平均分
* 分析:以课程名称作为key,以分数作为value进行 输出
*
* */
public static class AvgScoreMapper2 extends Mapper<LongWritable, Text, Text, DoubleWritable>{ @Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException { String[] splits = value.toString().split("\t");
String outKey = splits[0];
String outValue = splits[2]; context.write(new Text(outKey), new DoubleWritable(Double.parseDouble(outValue)));
} } /**
* 针对同一门课程,对values进行遍历计数,看看有多少人参加了考试,并计算出平均成绩
* */
public static class AvgScoreReducer2 extends Reducer<Text, DoubleWritable, Text, Text>{ @Override
protected void reduce(Text key, Iterable<DoubleWritable> values,
Context context) throws IOException, InterruptedException { int count = 0;
double sum = 0;
for(DoubleWritable value : values) {
count++;
sum += value.get();
} double avg = sum / count;
String outValue = count + "\t" + avg;
context.write(key, new Text(outValue));
} } }

第二题

MRAvgScore2.java

 public class MRAvgScore2 {

     public static void main(String[] args) throws Exception {

         Configuration conf = new Configuration();

         Job job = Job.getInstance(conf);

         job.setJarByClass(MRAvgScore2.class);
job.setMapperClass(ScoreMapper3.class);
job.setReducerClass(ScoreReducer3.class); job.setOutputKeyClass(StudentBean.class);
job.setOutputValueClass(NullWritable.class); job.setPartitionerClass(CoursePartitioner.class);
job.setNumReduceTasks(4); Path inputPath = new Path("D:\\MR\\hw\\work3\\output_hw1_1");
Path outputPath = new Path("D:\\MR\\hw\\work3\\output_hw2_1"); FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
boolean isDone = job.waitForCompletion(true);
System.exit(isDone ? 0 : 1);
} public static class ScoreMapper3 extends Mapper<LongWritable, Text, StudentBean, NullWritable>{ @Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException { String[] splits = value.toString().split("\t"); double score = Double.parseDouble(splits[2]);
DecimalFormat df = new DecimalFormat("#.0");
df.format(score); StudentBean student = new StudentBean(splits[0],splits[1],score); context.write(student, NullWritable.get()); } } public static class ScoreReducer3 extends Reducer<StudentBean, NullWritable, StudentBean, NullWritable>{ @Override
protected void reduce(StudentBean key, Iterable<NullWritable> values,Context context)
throws IOException, InterruptedException { for(NullWritable nvl : values){
context.write(key, nvl);
} }
}
}

StudentBean.java

 public class StudentBean implements WritableComparable<StudentBean>{
private String course;
private String name;
private double avgScore; public String getCourse() {
return course;
}
public void setCourse(String course) {
this.course = course;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public double getavgScore() {
return avgScore;
}
public void setavgScore(double avgScore) {
this.avgScore = avgScore;
}
public StudentBean(String course, String name, double avgScore) {
super();
this.course = course;
this.name = name;
this.avgScore = avgScore;
}
public StudentBean() {
super();
} @Override
public String toString() {
return course + "\t" + name + "\t" + avgScore;
}
@Override
public void readFields(DataInput in) throws IOException {
course = in.readUTF();
name = in.readUTF();
avgScore = in.readDouble();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(course);
out.writeUTF(name);
out.writeDouble(avgScore);
}
@Override
public int compareTo(StudentBean stu) {
double diffent = this.avgScore - stu.avgScore;
if(diffent == 0) {
return 0;
}else {
return diffent > 0 ? -1 : 1;
}
} }

第三题

MRScore3.java

 public class MRScore3 {

     public static void main(String[] args) throws Exception {

         Configuration conf1 = new Configuration();
Configuration conf2 = new Configuration(); Job job1 = Job.getInstance(conf1);
Job job2 = Job.getInstance(conf2); job1.setJarByClass(MRScore3.class);
job1.setMapperClass(MRMapper3_1.class);
//job.setReducerClass(ScoreReducer3.class); job1.setMapOutputKeyClass(IntWritable.class);
job1.setMapOutputValueClass(StudentBean.class);
job1.setOutputKeyClass(IntWritable.class);
job1.setOutputValueClass(StudentBean.class); job1.setPartitionerClass(CoursePartitioner2.class); job1.setNumReduceTasks(4); Path inputPath = new Path("D:\\MR\\hw\\work3\\input");
Path outputPath = new Path("D:\\MR\\hw\\work3\\output_hw3_1"); FileInputFormat.setInputPaths(job1, inputPath);
FileOutputFormat.setOutputPath(job1, outputPath); job2.setMapperClass(MRMapper3_2.class);
job2.setReducerClass(MRReducer3_2.class); job2.setMapOutputKeyClass(IntWritable.class);
job2.setMapOutputValueClass(StudentBean.class);
job2.setOutputKeyClass(StudentBean.class);
job2.setOutputValueClass(NullWritable.class); Path inputPath2 = new Path("D:\\MR\\hw\\work3\\output_hw3_1");
Path outputPath2 = new Path("D:\\MR\\hw\\work3\\output_hw3_end"); FileInputFormat.setInputPaths(job2, inputPath2);
FileOutputFormat.setOutputPath(job2, outputPath2); JobControl control = new JobControl("Score3"); ControlledJob aJob = new ControlledJob(job1.getConfiguration());
ControlledJob bJob = new ControlledJob(job2.getConfiguration()); bJob.addDependingJob(aJob); control.addJob(aJob);
control.addJob(bJob); Thread thread = new Thread(control);
thread.start(); while(!control.allFinished()) {
thread.sleep(1000);
}
System.exit(0); } public static class MRMapper3_1 extends Mapper<LongWritable, Text, IntWritable, StudentBean>{ StudentBean outKey = new StudentBean();
IntWritable outValue = new IntWritable();
List<String> scoreList = new ArrayList<>(); protected void map(LongWritable key, Text value, Context context) throws java.io.IOException ,InterruptedException { scoreList.clear();
String[] splits = value.toString().split(",");
long sum = 0; for(int i=2;i<splits.length;i++) {
scoreList.add(splits[i]);
sum += Long.parseLong(splits[i]);
} Collections.sort(scoreList);
outValue.set(Integer.parseInt(scoreList.get(scoreList.size()-1))); double avg = sum * 1.0/(splits.length-2);
outKey.setCourse(splits[0]);
outKey.setName(splits[1]);
outKey.setavgScore(avg); context.write(outValue, outKey); };
} public static class MRMapper3_2 extends Mapper<LongWritable, Text,IntWritable, StudentBean >{ StudentBean outValue = new StudentBean();
IntWritable outKey = new IntWritable(); protected void map(LongWritable key, Text value, Context context) throws java.io.IOException ,InterruptedException { String[] splits = value.toString().split("\t");
outKey.set(Integer.parseInt(splits[0])); outValue.setCourse(splits[1]);
outValue.setName(splits[2]);
outValue.setavgScore(Double.parseDouble(splits[3])); context.write(outKey, outValue); };
} public static class MRReducer3_2 extends Reducer<IntWritable, StudentBean, StudentBean, NullWritable>{ StudentBean outKey = new StudentBean(); @Override
protected void reduce(IntWritable key, Iterable<StudentBean> values,Context context)
throws IOException, InterruptedException { int length = values.toString().length(); for(StudentBean value : values) {
outKey = value;
} context.write(outKey, NullWritable.get()); }
} }