spark读取hbase数据

def main(args: Array[String]): Unit = {

      val hConf = HBaseConfiguration.create();

      hConf.set("hbase.zookeeper.quorum","m6,m7,m8")

      val tableName = "t_person"

      hConf.set(TableInputFormat.INPUT_TABLE, tableName)

      val hAdmin = new HBaseAdmin(hConf)

      val conf = new SparkConf()

      conf.set("spark.master", "local")

      conf.set("spark.app.name", "spark demo")

      val sc = new SparkContext(conf);

      val rs = sc.newAPIHadoopRDD(hConf, classOf[TableInputFormat], classOf[ImmutableBytesWritable], classOf[Result])

      rs.foreach(x => {

        println(Bytes.toString(x._2.getRow))

        // 通过列族和列名获取列

        println(Bytes.toInt(x._2.getValue("base_info".getBytes, "age".getBytes)))

      })

  }

　保存数据到hbase数据库中

def main(args: Array[String]): Unit = {

    val conf = new SparkConf()

    conf.set("spark.master", "local")

    conf.set("spark.app.name", "spark demo")

    // 创建SparkSession对象

    val spark = SparkSession.builder().appName("spark sql").config(conf).getOrCreate();

    // 创建sparkContext对象

    val sc = spark.sparkContext

    val hbaseConf = HBaseConfiguration.create()

    val tableName = "t_person"

    hbaseConf.set("hbase.zookeeper.quorum","m6,m7,m8")

    hbaseConf.set(TableInputFormat.INPUT_TABLE, tableName)

    // val hbaseAdmin = new HBaseAdmin(hbaseConf)

    val jobConf = new JobConf(hbaseConf, this.getClass)

    jobConf.setOutputFormat(classOf[TableOutputFormat])

    jobConf.set(TableOutputFormat.OUTPUT_TABLE, tableName)

    val pairs = sc.parallelize(List(("p_0000010", "12")))

    def convert(data : (String, String)) = {

      val p = new Put(Bytes.toBytes(data._1))

      p.add(Bytes.toBytes("base_info"), Bytes.toBytes("age"), Bytes.toBytes(data._2))

      (new ImmutableBytesWritable , p)

    }

    // 保存数据到hbase数据库中

    new PairRDDFunctions(pairs.map(convert)).saveAsHadoopDataset(jobConf)

  }

秒客网

spark读取hbase数据

保存数据到hbase数据库中

相关文章