编写MapReduce程序示例——求平均成绩

时间:2022-12-22 18:24:36

输入文件:

由于不识别中文,所以暂时使用姓名拼音

jiangxin 94
wangziwen 78
yangzi 83
wangkai 89
jiangxin 80
wangziwen 84
liutao 90
liutao 82
jiangxin 76
wangkai 77
wangkai 91
yangzi 86
jiangxin 88

每一行为一个学生姓名及一科成绩,如果有多门学科,则该学生存在多行数据。

编写MapReduce程序示例——求平均成绩

编写MapReduce程序示例——求平均成绩

编写代码:

package zmy.examples; 

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;


public class ZMY_AverageScore {
//定义log变量
public static final Log LOG =
LogFactory.getLog(FileInputFormat.class);

public static class TokenizerMapper
extends Mapper<Object, Text, Text, IntWritable>{
private Text word = new Text();

public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
String ScoreOne = itr.nextToken();
int ScoreInt = Integer.parseInt(ScoreOne);
context.write(word,new IntWritable(ScoreInt));
}
}
}


public static class IntSumReducer
extends Reducer<Text,IntWritable,Text,IntWritable> {

private IntWritable result = new IntWritable();

public void reduce(Text key, Iterable<IntWritable> values,
Context context
) throws IOException, InterruptedException {
int sum = 0;
int scoreNum = 0;
for (IntWritable val : values) {
scoreNum ++;
sum += val.get();
}
int averageScore = sum/scoreNum;
result.set(averageScore);
context.write(key, result);
}
}

public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
//检查运行命令
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: average score <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "score average");

job.setJarByClass(ZMY_AverageScore.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

运行结果:

编写MapReduce程序示例——求平均成绩