Hive的HQL的基本操作

时间:2021-12-18 13:44:52

HQL的基本命令(全):


1、查看数据库

   show databases;  //查看已经存在的数据库

   describe database test;   //查看某个已经存在的数据库

 

2、创建数据库

   create database test;

   create database if not exists test;

   create database test2 location '/user/hadoop/temp';  //制定数据库创建的目录

 

3、删除数据库

   drop database if exists test1 cascade;

 

4、切换当前工作的数据库

   use test2;

 

5、查看数据库中的表

   show tables;         //查看当前工作的数据库中的表

   show tables in test3;   //查看数据库test3中的表

 

6、创建表

   create table if not exists test.student(

   name string comment 'student name',

   age int comment 'student age',

   course array<string>,

   body map<string,float>,

   address struct<street:string,city:string,state:string>

   )

   comment 'the info of student'                 //表的备注

   row format delimited fields terminated by '\001'  //指定列分隔符’\001’

   collection items terminated by '\002'           //指定集合元素间的分隔符’\002’

   map keys terminated by '\003'                //指定类型为MAP字段的键值对分割符

   lines terminated by '\n'                      //指定行分割符

   stored as textfile                           //存储的文件格式

   location '/user/hive/warehouse/test.db/student';   

   

   create table if not exists test.student(

   id string comment 'student id',

   name string comment 'student name',

   sex string comment 'student sex',

   age int comment 'student age'

   )

   comment 'the info of student'                

   row format delimited fields terminated by '\t'                 

   lines terminated by '\n'                      

   stored as textfile;   

 

   create table if not exists student2 like student;   //复制已存在的表结构创建表(不复制数据)

 

   Hive中建表默认为管理表(managed table),当表需要被其它工具分析时,需要建立外部表(exter nal table

   create exter nal table --    //建立外部表

 

  建立分区表

  create table student_info(

  id string,

  name string,

  age int)

  partitioned by (province string,city string);

 

7、查看表结构信息以及列的注释

   desc student;

8、查看表的详细信息

   desc formatted student;

9、查看分区表的详细信息

   describe extended student_info;

 

10、删除表

    drop table student;

    drop table if exists student;

 

11、表重命名

alter table student rename to student_1;

12、增加、修改、删除分区

    alter table student_info add partition (province='fujain',city='fuzhou') location  

    '/user/hive/warehouse/test/fujian/fuzhou'; //表必须为分区表,且provincecity为分区字段

   

   alter table student_info drop partition(province='fujian',city='fuzhou');

 

13、修改列信息

    alter table student_1

    change column age sage int

    comment 'the student age'

after name;

 

14、增加列

alter table student_1 add columns (new_col int);

 

15、装载数据

load data local inpath '/home/hadoop/student_1' into table student;    // 从本地加载数据

    load data inpath '/home/hadoop/student_1' into table student;    // HDFS上加载数据

 

16、通过查询语句向表中插入语句:

insert overwrite table student2 select * from student;

 

17、导出数据:

    insert overwrite directory '/hive_tmp' select * from student2;

insert overwrite local directory '/home/hadoop/hive_tmp' select * from student2;

 

18、简单查询语句:

select id,name,age from student;

select id,name,age from student limit 3;

 

19、在查询语句中对表数据进行简单处理:

select id,name,

case

when age ='10' then '10'

when age='12' then '12'

else age

end

from student;

 

20、查询语句中添加条件:

select id,name,age from student where age >13;

select id,name,age from student where age >13 or id <1002;

 

21、Group by 语句:

select age,count(*) from student group by age;

select age,count(*) from student group by age having age>12;

 

24、Join 语句

select t1.id,t2.id from student t1 join score t2 on t1.id = t2.id;   //内连接

select t1.id,t2.id from student t1 left outer join score t2 on t1.id = t2.id; //左连接

select t1.id,t2.id from student t1 right outer join score t2 on t1.id = t2.id; //右连接

select t1.id,t2.id from student t1 full outer join score t2 on t1.id = t2.id; //全外连接

select t1.id,t2.id from student t1 left semi join score t2 on t1.id = t2.id; //左半连接

select /*+mapjoin(t1)*/ t1.id,t2.id from student t1 join score t2 on t1.id = t2.id;//map端连接

 

25、排序语句:

select id,name,sex,age from student order by age asc,id desc;

 

26、分桶和抽样:

select * from student tablesample(bucket 1 out of 3 on id);

select * from student tablesample(bucket 1 out of 3 on rand());

 

27、Union All

    select t.id

from(

select t1.id from student t1

union all

select t2.id from score t2) t;