HBase过滤器的使用

时间:2023-03-09 03:58:45
HBase过滤器的使用

一、常用过滤器:

  1、数据准备:  

Rowkey:001    Family:Quilfifier address    value: 昆明市西山区
Rowkey:001 Family:Quilfifier age value: 23
Rowkey:001 Family:Quilfifier name value: 小明
Rowkey:001 Family:Quilfifier personType value: 布控人员,涉恐人员,线索人员
Rowkey:001 Family:Quilfifier zjhm value: 620302199822332832
Rowkey:002 Family:Quilfifier address value: 昆明市西山区福海路
Rowkey:002 Family:Quilfifier age value: 33
Rowkey:002 Family:Quilfifier name value: 小李
Rowkey:002 Family:Quilfifier personType value: 重点人员,涉恐人员,线索人员
Rowkey:002 Family:Quilfifier zjhm value: 620302199822332442
Rowkey:003 Family:Quilfifier address value: 昆明市西山区福海路
Rowkey:003 Family:Quilfifier age value: 34
Rowkey:003 Family:Quilfifier name value: 小王
Rowkey:003 Family:Quilfifier personType value: 重点人员,涉恐人员,在控人员
Rowkey:003 Family:Quilfifier zjhm value: 620302192398432442
Rowkey:004 Family:Quilfifier address value: 昆明市滇池路
Rowkey:004 Family:Quilfifier age value: 45
Rowkey:004 Family:Quilfifier name value: 小花
Rowkey:004 Family:Quilfifier personType value: 涉恐人员,线索人员
Rowkey:004 Family:Quilfifier zjhm value: 643020304050403436
Rowkey:005 Family:Quilfifier address value: 云南省西双版纳
Rowkey:005 Family:Quilfifier age value: 60
Rowkey:005 Family:Quilfifier name value: 小马
Rowkey:005 Family:Quilfifier personType value: ,涉案人员,涉恐人员,线索人员
Rowkey:005 Family:Quilfifier zjhm value: 643020302938413436
Rowkey:006 Family:Quilfifier address value: 北京市朝阳区
Rowkey:006 Family:Quilfifier age value: 66
Rowkey:006 Family:Quilfifier name value: 大壮
Rowkey:006 Family:Quilfifier personType value: 良民
Rowkey:006 Family:Quilfifier zjhm value: 673747322344384456

  2、过滤器的使用:

  

package HBase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes; import java.io.IOException;
import java.util.ArrayList;
import java.util.List; public class Operator {
public static Admin admin = null;
public static Connection conn = null; public Connection getConn() throws IOException { Configuration hbaseConf = HBaseConfiguration.create();
     hbaseConf.set("hbase.zookeeper.quorum","master:2181,slave1:2181,slave2:2181");
hbaseConf.set("hbase.zookeeper.quorum", "master:2181");
Connection HbaseConn = ConnectionFactory.createConnection(hbaseConf);
return HbaseConn;
} public Operator() {
try {
conn = Hbase.getConnection();
admin = conn.getAdmin();
} catch (Exception e) {
e.getMessage();
}
} public static void main(String[] args) throws Exception {
Operator operator = new Operator();
operator.filter("person");
// operator.pageFilter("person");
} /**
* SingleColumnValueFilter和SingleColumnValueExcludeFilter
* 用来查找并返回指定条件的列的数据
* a,如果查找时没有该列,两种filter都会把该行所有数据返回
* b,如果查找时有该列,但是不符合条件,则该行所有列都不返回
* c,如果找到该列,并且符合条件,前者返回所有列,后者返回除该列以外的所有
*/
public void filter(String tableName) throws Exception {
Table table = conn.getTable(TableName.valueOf(tableName));
Scan scan = new Scan();
//SingleColumnValueFilter:二进制比较器,完整匹配字节数组,返回匹配到的整行
Filter filter = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("personType"), CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("良民")));
//SingleColumnValueFilter:二进制比较器,只比较前缀是否相同,返回的是匹配到的整行,并非每一列
Filter filter0 = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("personType"), CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator(Bytes.toBytes("重点")));
//SingleColumnValueFilter:匹配正则表达式,返回匹配到的整行
Filter filter1 = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("personType"), CompareFilter.CompareOp.EQUAL, new RegexStringComparator(".*重点人员.*"));
//SingleColumnValueFilter:匹配是否包含子串,大小写不敏感,返回匹配到的整行
Filter filter2 = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("personType"), CompareFilter.CompareOp.EQUAL, new SubstringComparator("线索人员"));
//查询出匹配的行,但是过滤掉所匹配的列
Filter filter3 = new SingleColumnValueExcludeFilter(Bytes.toBytes("info"), Bytes.toBytes("personType"), CompareFilter.CompareOp.EQUAL, new SubstringComparator("线索人员"));
//RandomRowFilter:按照一定的几率来返回随机的结果
Filter filter4 = new RandomRowFilter((float) 0.5);
//RowFilter:删选出指定开头行健的所有匹配的行
Filter filter5 = new PrefixFilter(Bytes.toBytes("00"));
//ValueFilter:按照value全数据库搜索,返回的是所匹配值的某一列,并非某一行
Filter filter6 = new ValueFilter(CompareFilter.CompareOp.NOT_EQUAL, new BinaryComparator(Bytes.toBytes("23")));
//按family(列族)查找,取回所有符合条件的“family”
Filter filter7 = new FamilyFilter(CompareFilter.CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("info")));
//KeyOnlyFilter:返回所有的行,但是值全是空
Filter filter8 = new KeyOnlyFilter();
//ColumnsPrefixFilter:按照列明的前缀来筛选单元格,返回所有行的指定某列
Filter filter9 = new ColumnPrefixFilter(Bytes.toBytes("ag"));
//FirsterKeyOnlyFilter:返回的结果集中只包含第一列的而数据,在找到每一行的第一列后就会停止扫描
Filter filter10 = new FirstKeyOnlyFilter();
//InclusiveStopFilter:返回截止到指定行的所有数据,包含最后一行(005)。使用startRow以及stopRow的时候是左闭右开
Filter filter11 = new InclusiveStopFilter(Bytes.toBytes("005"));
//cloumnCountGetFilter:返回每行最多返回多少列,在一行列数超过一定数量的时候,结束整个表的扫描
Filter filter12 = new ColumnCountGetFilter(6);
//SkipFilter:附加过滤器,如果发现一行中的某一列不符合条件,则整行就会被过滤
Filter filter13 = new SkipFilter(filter6);
//WhileMatchFilter:过滤数据,直到不符合条件,停止扫扫描,返回的是符合条件的每一列数据
Filter filter14 = new WhileMatchFilter(filter6);
//QualifierFilter:列名过滤,返回指定的每一列数据
Filter filter15 = new QualifierFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("age")));
//MultipleColumnPrefixFilter:与ColumnsPrefixFilter不同的是可以指定多个列明的前缀
byte[][] prefixs = new byte[][]{Bytes.toBytes("ag"), Bytes.toBytes("na")};
Filter filter16 = new MultipleColumnPrefixFilter(prefixs);
//ColumnRangeFilter:可以进行高效的列名内部扫描,因为列名是已经按照字典顺序排好的,返回[minColumn,maxColumn]之间的数据
boolean minColumnlnclusive = true;
boolean maxColumnlnclusive = true;
Filter filter17 = new ColumnRangeFilter(Bytes.toBytes("name"), minColumnlnclusive, Bytes.toBytes("zjhm"), maxColumnlnclusive);
//DependentColumnFilter:尝试找到该列所在的每一行,并返回改行具有相同时间戳的全部键值对,返回的是具体的某一列,并非某一行
Filter filter18 = new DependentColumnFilter(Bytes.toBytes("info"), Bytes.toBytes("age"));
//RandomRowFilter:随机选择一行的过滤器,chance是一个浮点数
float chance = 0.6f;
Filter filter19 = new RandomRowFilter(chance);
//ColumnPaginationFilter:按列分页过滤器,针对列数量很多的情况使用
int limit = 3;
int columnOffset = 0;
Filter filter20 = new ColumnPaginationFilter(limit, columnOffset);
//综合过滤器使用
List<Filter> filters = new ArrayList<>();
filters.add(filter1);
filters.add(filter2);
FilterList fl = new FilterList(FilterList.Operator.MUST_PASS_ALL, filters); ((SingleColumnValueFilter) filter1).setFilterIfMissing(false);
scan.setFilter(filter20);
ResultScanner scanner = table.getScanner(scan);
for (Result r : scanner) {
for (Cell cell : r.rawCells()) {
System.out.println(
"Rowkey:" + Bytes.toString(r.getRow()) + "\t" +
"Family:Quilfifier " + Bytes.toString(CellUtil.cloneQualifier(cell)) + "\t" +
"value: " + Bytes.toString(CellUtil.cloneValue(cell))
);
}
}
scanner.close();
} /**
* 分页过滤器
* PageFilter:用于按行分页
*/
public void pageFilter(String tableName) throws IOException {
Table table = conn.getTable(TableName.valueOf(tableName));
long pageSize = 2;
int totalRowsCount = 0;
PageFilter pageFilter = new PageFilter(pageSize);
byte[] lastRow = null;
while (true) {
Scan scan = new Scan();
scan.setFilter(pageFilter);
if (lastRow != null) {
byte[] posfix = Bytes.toBytes("002");
byte[] startRow = Bytes.add(lastRow, posfix);
scan.setStartRow(startRow);
System.out.println("start row :" + Bytes.toString(startRow));
}
ResultScanner scanner = table.getScanner(scan);
int localRowsCount = 0;
for (Result result : scanner) {
System.out.println(localRowsCount++ + ":" + result);
totalRowsCount++;
lastRow = result.getRow();
}
scanner.close();
if (localRowsCount == 0) break;
}
System.out.println("total rows is :" + totalRowsCount);
}
}

  3、自定义过滤器

    --后面在补