spark读取hbase数据

时间:2023-03-09 18:39:51
spark读取hbase数据
def main(args: Array[String]): Unit = {
val hConf = HBaseConfiguration.create();
hConf.set("hbase.zookeeper.quorum","m6,m7,m8")
val tableName = "t_person"
hConf.set(TableInputFormat.INPUT_TABLE, tableName)
val hAdmin = new HBaseAdmin(hConf)
val conf = new SparkConf()
conf.set("spark.master", "local")
conf.set("spark.app.name", "spark demo")
val sc = new SparkContext(conf);
val rs = sc.newAPIHadoopRDD(hConf, classOf[TableInputFormat], classOf[ImmutableBytesWritable], classOf[Result])
rs.foreach(x => {
println(Bytes.toString(x._2.getRow))
// 通过列族和列名获取列
println(Bytes.toInt(x._2.getValue("base_info".getBytes, "age".getBytes)))
})
}

 保存数据到hbase数据库中

def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.set("spark.master", "local")
conf.set("spark.app.name", "spark demo")
// 创建SparkSession对象
val spark = SparkSession.builder().appName("spark sql").config(conf).getOrCreate();
// 创建sparkContext对象
val sc = spark.sparkContext val hbaseConf = HBaseConfiguration.create()
val tableName = "t_person"
hbaseConf.set("hbase.zookeeper.quorum","m6,m7,m8")
hbaseConf.set(TableInputFormat.INPUT_TABLE, tableName) // val hbaseAdmin = new HBaseAdmin(hbaseConf)
val jobConf = new JobConf(hbaseConf, this.getClass)
jobConf.setOutputFormat(classOf[TableOutputFormat])
jobConf.set(TableOutputFormat.OUTPUT_TABLE, tableName) val pairs = sc.parallelize(List(("p_0000010", "12"))) def convert(data : (String, String)) = {
val p = new Put(Bytes.toBytes(data._1))
p.add(Bytes.toBytes("base_info"), Bytes.toBytes("age"), Bytes.toBytes(data._2))
(new ImmutableBytesWritable , p)
} // 保存数据到hbase数据库中
new PairRDDFunctions(pairs.map(convert)).saveAsHadoopDataset(jobConf)
}