第二个MapReduce

时间:2023-03-09 20:57:57
第二个MapReduce

大家在学习Hadoop的MapReduce的时候,90%的第一个程序都是WordCount,所以在这里分享一下我的第二个MapReduce程序。对于学习编程语言的人来说,有时候代码是最好的沟通方式之一。

 package com.zhongxin.mr;

 import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import java.io.IOException;
import java.math.BigDecimal;
import java.util.regex.Pattern; /**
* 用户已收本息
* Created by DingYS on 2017/11/21.
*/
public class UserReceiveAmount { public static class Map extends Mapper<LongWritable,Text,Text,Text>{
private Text outKey = new Text();
private Text outValue = new Text();
private Pattern pattern = Pattern.compile(","); @Override
public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{
// 利息
BigDecimal interest = new BigDecimal(0);
// 本金
BigDecimal capital = new BigDecimal(0);
String splits[] = pattern.split(String.valueOf(value));
String onwerType = splits[2];
String fundsDirection = splits[6];
String tradeType = splits[5];
String penaltyAmount = splits[15];
String tradeAmount = splits[7];
String tradeShare = splits[8];
String ownerCustomNo = splits[1];
if("USER".equals(onwerType) && "INCR".equals(fundsDirection) && !Pattern.matches("CURRENT_.*?",tradeType)){
if("INTEREST".equals(tradeType) && ("null".equals(penaltyAmount) || "".equals(penaltyAmount) ||"0.00".equals(penaltyAmount))){
interest =new BigDecimal(Double.parseDouble(tradeAmount)).setScale(2,BigDecimal.ROUND_HALF_UP);
}else{
interest = new BigDecimal(Double.parseDouble(tradeAmount)).subtract(new BigDecimal(Double.parseDouble(tradeShare))).setScale(2,BigDecimal.ROUND_HALF_UP);
capital = new BigDecimal(Double.parseDouble(tradeShare)).setScale(2,BigDecimal.ROUND_HALF_UP);
}
outKey.set(ownerCustomNo);
outValue.set(String.valueOf(interest) + pattern + String.valueOf(capital));
context.write(outKey,outValue);
}
}
} public static class Reduce extends Reducer<Text,Text,Text,Text>{ public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException{
Text outValue = new Text();
BigDecimal interest = new BigDecimal(0);
BigDecimal capital = new BigDecimal(0);
for(Text value:values){
String[] splits = value.toString().split(",");
interest = interest.add(new BigDecimal(Double.parseDouble(splits[0]))).setScale(2,BigDecimal.ROUND_HALF_UP);
capital = capital.add(new BigDecimal(Double.parseDouble(splits[1]))).setScale(2,BigDecimal.ROUND_HALF_UP);
}
outValue.set(String.valueOf(interest) + "\t" + String.valueOf(capital));
context.write(key,outValue);
}
} public static void main(String[] args) throws Exception{
Configuration config = new Configuration();
Job job = Job.getInstance(config);
job.setJobName("userReceiveAmount");
job.setJarByClass(UserReceiveAmount.class); job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class); job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job,new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1])); job.waitForCompletion(true); }
}

对于看懂mapReduce这个程序,有一个非常关键的点就是:map每次读取一行数据,相同key的数据进入到同一个reduce中。

上面是将统计结果输出到hdfs上,下面来一个输出到Hbase中的,请看码

package com.zhongxin.mr;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import java.io.IOException;
import java.math.BigDecimal;
import java.util.regex.Pattern; /**
* 用户已收本息
* Created by DingYS on 2017/11/21.
*/
public class UserReceiveAmount { public static class Map extends Mapper<LongWritable,Text,Text,Text>{
private Text outKey = new Text();
private Text outValue = new Text();
private Pattern pattern = Pattern.compile(","); @Override
public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{
// 利息
BigDecimal interest = new BigDecimal(0);
// 本金
BigDecimal capital = new BigDecimal(0);
String splits[] = pattern.split(String.valueOf(value));
String onwerType = splits[2];
String fundsDirection = splits[6];
String tradeType = splits[5];
String penaltyAmount = splits[15];
String tradeAmount = splits[7];
String tradeShare = splits[8];
String ownerCustomNo = splits[1];
if("USER".equals(onwerType) && "INCR".equals(fundsDirection) && !Pattern.matches("CURRENT_.*?",tradeType)){
if("INTEREST".equals(tradeType) && ("null".equals(penaltyAmount) || "".equals(penaltyAmount) ||"0.00".equals(penaltyAmount))){
interest =new BigDecimal(Double.parseDouble(tradeAmount)).setScale(2,BigDecimal.ROUND_HALF_UP);
}else{
interest = new BigDecimal(Double.parseDouble(tradeAmount)).subtract(new BigDecimal(Double.parseDouble(tradeShare))).setScale(2,BigDecimal.ROUND_HALF_UP);
capital = new BigDecimal(Double.parseDouble(tradeShare)).setScale(2,BigDecimal.ROUND_HALF_UP);
}
outKey.set(ownerCustomNo);
outValue.set(String.valueOf(interest) + pattern + String.valueOf(capital));
context.write(outKey,outValue);
}
}
} public static class Reduce extends TableReducer<Text,Text,ImmutableBytesWritable> { ImmutableBytesWritable k = new ImmutableBytesWritable(); public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException{
BigDecimal interest = new BigDecimal(0);
BigDecimal capital = new BigDecimal(0);
for(Text value:values){
String[] splits = value.toString().split(",");
interest = interest.add(new BigDecimal(Double.parseDouble(splits[0]))).setScale(2,BigDecimal.ROUND_HALF_UP);
capital = capital.add(new BigDecimal(Double.parseDouble(splits[1]))).setScale(2,BigDecimal.ROUND_HALF_UP);
}
String family = "info";
Put put = new Put(String.valueOf(key).getBytes());
put.addColumn(family.getBytes(),"interest".getBytes(),String.valueOf(interest).getBytes());
put.addColumn(family.getBytes(),"capital".getBytes(),String.valueOf(capital).getBytes());
k.set(key.getBytes());
context.write(k,put);
}
} public static void main(String[] args) throws Exception{
Configuration config = HBaseConfiguration.create();
Job job = Job.getInstance(config,"userReceiveAmount");
job.setJarByClass(UserReceiveAmount.class); FileInputFormat.addInputPath(job,new Path(args[0]));
job.setMapperClass(Map.class);
TableMapReduceUtil.initTableReducerJob("userReceiveAmount",Reduce.class,job); job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Text.class);
job.setOutputValueClass(Mutation.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
}

  注意点:rowkey的类型需要是String,如果是Text,需要Text.toString()一下,Text中重写了toString()方法,经测试String.valueOf()也没问题

Put put = new Put(rowkey.getBytes());