以IntWritable为例介绍,定制writable的步骤
//继承 WritableComparable接口(继承了writable接口和comparable接口)
public class IntWritable implements WritableComparable<IntWritable> { //定义普通java类型的成员变量
private int value; //成员变量的set方法
public void set(int value) { this.value = value; }
//成员变量的get方法
public int get() { return value; } //无参构造函数,为MR框架反射机制所调用
public IntWritable() {}
//有参构造函数
public IntWritable(int value) { set(value); } //反序列化方法
public void readFields(DataInput in) throws IOException {
value = in.readInt();
}
//序列化方法
public void write(DataOutput out) throws IOException {
out.writeInt(value);
} //覆写equals()方法
public boolean equals(Object o) {
if (!(o instanceof IntWritable))
return false;
IntWritable other = (IntWritable)o;
return this.value == other.value;
} //覆写hashCode()方法
public int hashCode() {
return value;
} //覆写toString()方法
public String toString() {
return Integer.toString(value);
} //覆写 comparable接口 中的compareTo()方法【默认升序】
public int compareTo(IntWritable o) {
int thisValue = this.value;
int thatValue = o.value;
return (thisValue<thatValue ? - : (thisValue==thatValue ? : ));
} //1. 定义内部类Comparator【比较器】继承自WritableComparator类
public static class Comparator extends WritableComparator { //2. 不可缺少的无参构造函数,反射机制调用
public Comparator() {
super(IntWritable.class);
} //3. 覆写 字节流层面的比较排序
public int compare(byte[] b1, int s1, int l1,
byte[] b2, int s2, int l2) {
//返回 字符数组b1 的编码值
int thisValue = readInt(b1, s1);
int thatValue = readInt(b2, s2);
return (thisValue<thatValue ? - : (thisValue==thatValue ? : ));
}
}
//4. 向WritableComparator类注册定制的writable类【Haoop自动调用上述的比较器】
static {
WritableComparator.define(IntWritable.class, new Comparator());
}
}
注意点:
- 在定制Writable类中实现字节流层面的比较时,一般不直接继承RawComparator类,而是继承其子类WritableComparator,因为子类为我们提供了一些有用的工具方法,比如从字节数组中读取int、long和vlong等值。并覆写 public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) 方法。
- 当然编写完compare()方法之后,不要忘了为定制的Writable类注册编写的RawComparator类。
- 对于代码中的 readInt()工具方法的具体实现:
/** Parse an integer from a byte array. */
public static int readInt(byte[] bytes, int start) {
return (((bytes[start ] & 0xff) << ) +
((bytes[start+] & 0xff) << ) +
((bytes[start+] & 0xff) << ) +
((bytes[start+] & 0xff)));
}