Hive窗口函数面试题(带答案版本)

时间:2024-04-03 16:17:06

Hive笔试题实战

短视频

题目一:计算各个视频的平均完播率

有用户-视频互动表tb_user_video_log:

id

uid

video_id

start_time

end_time

if_follow

if_like

if_retweet

comment_id

1

101

2001

2021-10-01 10:00:00

2021-10-01 10:00:30

0

1

1

NULL

2

102

2001

2021-10-01 10:00:00

2021-10-01 10:00:24

0

0

1

NULL

3

103

2001

2021-10-01 11:00:00

2021-10-01 11:00:34

0

1

0

1732526

4

101

2002

2021-09-01 10:00:00

2021-9-01 10:00:42

1

0

1

NULL

5

102

2002

2021-10-01 11:00:00

2021-10-01 10:00:30

1

0

1

NULL

uid-用户ID,video_id-视频ID,start_time-开始观看时间,end_time-结束观看时间,if_follow-是否关注,if_like-是否点赞,if_retweet-是否转发,comment_id-评论ID。

有短视频信息表tb_video_info:

id

video_id

author

tag

duration

release_time

1

2001

901

影视

30

2021-01-01 07:00:00

2

2002

901

美食

60

2021-01-01 07:00:00

3

2003

902

旅游

90

2021-01-01 07:00:00

video_id-视频ID,author-创作者ID,tag-类别标签,duration-视频时长(秒),release_time-发布时间。

问题:计算2021年里有播放记录的每个视频的完播率(结果保留三位小数),并按完播率降序排序。输出结果如下:

video_id

avg_comp_play_rate

2001

0.667

2002

0.000

注:视频完播率是指完成播放次数占总播放次数的比例。简单起见,结束观看时间与开始播放时间的差≥视频时长时,视为完成播放。

-- 建立用户-视频互动表

drop table if exists tb_user_video_log;

create table tb_user_video_log (

id         int comment '自增ID', 

uid        int comment '用户ID',

video_id   int comment '视频ID',

start_time string COMMENT '开始观看时间',

end_time   string COMMENT '结束观看时间',

    if_follow  int comment '是否关注',

    if_like    int comment '是否点赞',

    if_retweet int comment '是否转发',

    comment_id int comment '评论ID'

) comment '用户-视频互动表'

row format delimited fields terminated by ',';

-- 建立短视频信息表

drop table if exists tb_video_info;

create table tb_video_info (

    id           int comment '自增ID',

    video_id     int comment '视频ID',

    author       int comment '创作者ID',

    tag          string comment '类别标签',

    duration     int comment '视频时长(秒数)',

    release_time string comment '发布时间'

) comment '短视频信息表'

    row format delimited fields terminated by ',';

-- 插入数据

insert into tb_user_video_log

values (1, 101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:30', 0, 1, 1, null),

       (2, 102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:24', 0, 0, 1, null),

       (3, 103, 2001, '2021-10-01 11:00:00', '2021-10-01 11:00:34', 0, 1, 0, 1732526),

       (4, 101, 2002, '2021-09-01 10:00:00', '2021-09-01 10:00:42', 1, 0, 1, null),

       (5, 102, 2002, '2021-10-01 11:00:00', '2021-10-01 11:00:30', 1, 0, 1, null);

insert into tb_video_info

values (1, 2001, 901, '影视', 30, '2021-01-01 7:00:00'),

       (2, 2002, 901, '美食', 60, '2021-01-01 7:00:00'),

       (3, 2003, 902, '旅游', 90, '2021-01-01 7:00:00');

参考答案:

-- 第一步:找出2021年有过播放的视频

select * from tb_user_video_log where year(start_time) = 2021;

-- 第二步:计算(每一个视频的)完播次数。完播:结束时间-起始时间>=视频时长

select a.video_id as video_id,

       sum(if(unix_timestamp(a.end_time) - unix_timestamp(a.start_time) >= b.duration, 1, 0))

from (

      select * from tb_user_video_log where year(start_time) = 2021

) a left join tb_video_info b on a.video_id = b.video_id

group by a.video_id;

-- 第三步:计算完播率。完播次数/总的播放次数

select a.video_id as video_id,

       sum(if(unix_timestamp(a.end_time) - unix_timestamp(a.start_time) >= b.duration, 1, 0)) / count(*)

from (

     select * from tb_user_video_log where year(start_time) = 2021

) a left join tb_video_info b on a.video_id = b.video_id

group by a.video_id;

-- 第四步:保留三位小数,还需要降序排序

select a.video_id  as video_id,

       round(sum(if(unix_timestamp(a.end_time) - unix_timestamp(a.start_time) >= b.duration, 1, 0)) / count(*), 3) as avg_comp_play_rate

from (

      select * from tb_user_video_log where year(start_time) = 2021

) a left join tb_video_info b on a.video_id = b.video_id

group by a.video_id

order by avg_comp_play_rate desc;

题目二:平均播放进度大于60%的视频类别

有用户-视频互动表tb_user_video_log:

id

uid

video_id

start_time

end_time

if_follow

if_like

if_retweet

comment_id

1

101

2001

2021-10-01 10:00:00

2021-10-01 10:00:30

0

1

1

NULL

2

102

2001

2021-10-01 10:00:00

2021-10-01 10:00:21

0

0

1

NULL

3

103

2001

2021-10-01 11:00:50

2021-10-01 11:01:20

0

1

0

1732526

4

102

2002

2021-10-01 11:00:00

2021-10-01 11:00:30

1

0

1

NULL

5

103

2002

2021-10-01 10:59:05

2021-10-01 11:00:05

1

0

1

NULL

uid-用户ID,video_id-视频ID,start_time-开始观看时间,end_time-结束观看时间,if_follow-是否关注,if_like-是否点赞,if_retweet-是否转发,comment_id-评论ID。

有短视频信息表tb_video_info:

id

video_id

author

tag

duration

release_time

1

2001

901

影视

30

2021-01-01 07:00:00

2

2002

901

美食

60

2021-01-01 07:00:00

3

2003

902

旅游

90

2021-01-01 07:00:00

video_id-视频ID,author-创作者ID,tag-类别标签,duration-视频时长(秒),release_time-发布时间。

问题:计算各类视频的平均播放进度,将进度大于60%的类别输出(结果保留两位小数,并按播放进度倒序排序)。示例数据的输出结果如下:

tag

avg_play_progress

影视

90.00%

美食

75.00%

注:播放进度=播放时长÷视频时长*100%,当播放时长大于视频时长时,播放进度均记为100%。

例如:影视类视频2001被用户101、102、103看过,播放进度分别为:30秒(100%)、21秒(70%)、30秒(100%),平均播放进度为(100%+70%+100%)/3=90.00%(保留两位小数)。

-- 建立用户-视频互动表

drop table if exists tb_user_video_log;

create table tb_user_video_log (

    id         int comment '自增ID',

    uid        int comment '用户ID',

    video_id   int comment '视频ID',

    start_time string COMMENT '开始观看时间',

    end_time   string COMMENT '结束观看时间',

    if_follow  int comment '是否关注',

    if_like    int comment '是否点赞',

    if_retweet int comment '是否转发',

    comment_id int comment '评论ID'

) comment '用户-视频互动表'

    row format delimited fields terminated by ',';

-- 建立短视频信息表

drop table if exists tb_video_info;

create table tb_video_info (

    id           int comment '自增ID',

    video_id     int comment '视频ID',

    author       int comment '创作者ID',

    tag          string comment '类别标签',

    duration     int comment '视频时长(秒数)',

    release_time string comment '发布时间'

) comment '短视频信息表'

    row format delimited fields terminated by ',';

-- 插入数据

insert into tb_user_video_log

values (1, 101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:30', 0, 1, 1, null),

       (2, 102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:21', 0, 0, 1, null),

       (3, 103, 2001, '2021-10-01 11:00:50', '2021-10-01 11:01:20', 0, 1, 0, 1732526),

       (4, 102, 2002, '2021-10-01 11:00:00', '2021-10-01 11:00:30', 1, 0, 1, null),

       (5, 103, 2002, '2021-10-01 10:59:05', '2021-10-01 11:00:05', 1, 0, 1, null);

insert into tb_video_info

values (1, 2001, 901, '影视', 30, '2021-01-01 7:00:00'),

       (2, 2002, 901, '美食', 60, '2021-01-01 7:00:00'),

       (3, 2003, 902, '旅游', 90, '2021-01-01 7:00:00');

参考答案:

-- 第一步:计算每次播放的播放时长

select video_id, unix_timestamp(end_time) - unix_timestamp(start_time) as total_time from tb_user_video_log;

-- 第二步:计算每一次的播放进度

select a.video_id as video_id,

       if(a.total_time / b.duration > 1, 1, a.total_time / b.duration) as play_progress

from (

    select video_id, unix_timestamp(end_time) - unix_timestamp(start_time) as total_time from tb_user_video_log

) a left join tb_video_info b on a.video_id = b.video_id;

-- 第三步:计算各类视频的平均播放进度

select b.tag,

       avg(if(a.total_time / b.duration > 1, 1, a.total_time / b.duration)) as avg_play_progress

from (

    select video_id, unix_timestamp(end_time) - unix_timestamp(start_time) as total_time from tb_user_video_log

) a left join tb_video_info b on a.video_id = b.video_id

group by b.tag;

-- 第四步:过滤,排序

select b.tag,

       avg(if(a.total_time / b.duration > 1, 1, a.total_time / b.duration)) as avg_play_progress

from (

    select video_id, unix_timestamp(end_time) - unix_timestamp(start_time) as total_time from tb_user_video_log

) a left join tb_video_info b on a.video_id = b.video_id

group by b.tag having avg_play_progress > 0.6 order by avg_play_progress desc;

-- 第五步:百分比

select tag,

       concat(round(avg_play_progress * 100, 2), '%') as avg_play_progress

from (

  select b.tag as tag,

         avg(if(a.total_time / b.duration > 1, 1, a.total_time / b.duration)) as avg_play_progress

  from (

      select video_id, unix_timestamp(end_time) - unix_timestamp(start_time) as total_time from tb_user_video_log

  ) a left join tb_video_info b on a.video_id = b.video_id

  group by b.tag

  having avg_play_progress > 0.6

  order by avg_play_progress desc

)t;

题目三:每类视频近一个月的转发量/率

有用户-视频互动表tb_user_video_log:

id

uid

video_id

start_time

end_time

if_follow

if_like

if_retweet

comment_id

1

101

2001

2021-10-01 10:00:00

2021-10-01 10:00:20

0

1

1

NULL

2

102

2001

2021-10-01 10:00:00

2021-10-01 10:00:15

0

0

1

NULL

3

103

2001

2021-10-01 11:00:50

2021-10-01 11:01:15

0

1

0

1732526

4

102

2002

2021-09-10 11:00:00

2021-09-10 11:00:30

1

0

1

NULL

5

103

2002

2021-10-01 10:59:05

2021-10-01 11:00:05

1

0

0

NULL

uid-用户ID,video_id-视频ID,start_time-开始观看时间,end_time-结束观看时间,if_follow-是否关注,if_like-是否点赞,if_retweet-是否转发,comment_id-评论ID。

有短视频信息表tb_video_info:

id

video_id

author

tag

duration

release_time

1

2001

901

影视

30

2021-01-01 07:00:00

2

2002

901

美食

60

2021-01-01 07:00:00

3

2003

902

旅游

90

2021-01-01 07:00:00

video_id-视频ID,author-创作者ID,tag-类别标签,duration-视频时长(秒),release_time-发布时间。

问题:统计在有用户互动的最近一个月(按包含当天在内的近30天算,比如10月31日的近30天为10.2~10.31之间的数据)中,每类视频的转发量和转发率(保留3位小数)。输出结果如下:

tag

retweet_cut

retweet_rate

影视

2

0.667

美食

1

0.500

注:转发率=转发量÷播放量。结果按转发率降序排序。

解释:由表tb_user_video_log的数据可得,数据转储当天为2021年10月1日。近30天内,影视类视频2001共有3次播放记录,被转发2次,转发率为0.667;美食类视频2002共有2次播放记录,1次被转发,转发率为0.500。

-- 建立用户-视频互动表

drop table if exists tb_user_video_log;

create table tb_user_video_log (

    id         int comment '自增ID',

    uid        int comment '用户ID',

    video_id   int comment '视频ID',

    start_time string COMMENT '开始观看时间',

    end_time   string COMMENT '结束观看时间',

    if_follow  int comment '是否关注',

    if_like    int comment '是否点赞',

    if_retweet int comment '是否转发',

    comment_id int comment '评论ID'

) comment '用户-视频互动表'

    row format delimited fields terminated by ',';

-- 建立短视频信息表

drop table if exists tb_video_info;

create table tb_video_info (

    id           int comment '自增ID',

    video_id     int comment '视频ID',

    author       int comment '创作者ID',

    tag          string comment '类别标签',

    duration     int comment '视频时长(秒数)',

    release_time string comment '发布时间'

) comment '短视频信息表'

    row format delimited fields terminated by ',';

-- 插入数据

insert into tb_user_video_log

values (1, 101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:20', 0, 1, 1, null),

       (2, 102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:15', 0, 0, 1, null),

       (3, 103, 2001, '2021-10-01 11:00:50', '2021-10-01 11:01:15', 0, 1, 0, 1732526),

       (4, 102, 2002, '2021-09-10 11:00:00', '2021-09-10 11:00:30', 1, 0, 1, null),

       (5, 103, 2002, '2021-10-01 10:59:05', '2021-10-01 11:00:05', 1, 0, 0, null);

insert into tb_video_info

values (1, 2001, 901, '影视', 30, '2021-01-01 7:00:00'),

       (2, 2002, 901, '美食', 60, '2021-01-01 7:00:00'),

       (3, 2003, 902, '旅游', 90, '2021-01-01 7:00:00');

参考答案:

-- 1. 找出最后一次的播放时间

select max(start_time) from tb_user_video_log;

-- 2. 基于最后一次的播放时间,向前推29天(包含当天在内的近30天算),获取到近30天内的所有播放记录

select *

from tb_user_video_log a,

     (select max(start_time) as last_date from tb_user_video_log) b

where datediff(b.last_date, a.start_time) <= 29;

-- 3. 计算每一类视频的转发量和转发率

select t2.tag                               as tag,

       sum(if_retweet)                      as retweet_cut,

       round(sum(if_retweet) / count(*), 3) as retweet_rate

from (

    select *

    from tb_user_video_log a,

           (select max(start_time) as last_date from tb_user_video_log) b

    where datediff(b.last_date, a.start_time) <= 29

) t1 left join tb_video_info t2 on t1.video_id = t2.video_id

group by t2.tag order by retweet_rate desc;

题目四:每个创作者每月的涨粉率及截止当前的总粉丝量

有用户-视频互动表tb_user_video_log:

id

uid

video_id

start_time

end_time

if_follow

if_like

if_retweet

comment_id

1

101

2001

2021-09-01 10:00:00

2021-09-01 10:00:20

0

1

1

NULL

2

105

2002

2021-09-10 11:00:00

2021-09-10 11:00:30

1

0

1

NULL

3

101

2001

2021-10-01 10:00:00

2021-10-01 10:00:20

1

1

1

NULL

4

102

2001

2021-10-01 10:00:00

2021-10-01 10:00:15

0

0

1

NULL

5

103

2001

2021-10-01 11:00:50

2021-10-01 11:01:15

1

1

0

1732526

6

106

2002

2021-10-01 10:59:05

021-10-01 11:00:05

2

0

0

NULL

uid-用户ID,video_id-视频ID,start_time-开始观看时间,end_time-结束观看时间,if_follow-是否关注,if_like-是否点赞,if_retweet-是否转发,comment_id-评论ID。

有短视频信息表tb_video_info:

id

video_id

author

tag

duration

release_time

1

2001

901

影视

30

2021-01-01 07:00:00

2

2002

901

美食

60

2021-01-01 07:00:00

3

2003

902

旅游

90

2021-01-01 07:00:00

4

2004

902

美女

90

2020-01-01 08:00:00

video_id-视频ID,author-创作者ID,tag-类别标签,duration-视频时长(秒),release_time-发布时间。

问题:计算2021年里每个创作者每月的涨粉率及截止当月的总粉丝量。输出结果如下:

author

month

fans_growth_rate

total_fans

901

2021-09

0.500

1

901

2021-10

0.250

2

注:涨粉率=(加粉量 - 掉粉量) / 播放量。结果按创作者ID、总粉丝量升序排序。if_follow-是否关注,为1表示用户观看视频中关注了视频创作者,为0表示此次互动前后关注状态未发生变化,为2表示本次观看过程中取消了关注。

解释:示例数据中表tb_user_video_log里只有视频2001和2002的播放记录,都来自创作者901,播放时间在2021年9月和10月;其中9月里加粉量为1,掉粉量为0,播放量为2,因此涨粉率为0.500(保留3位小数);其中10月里加粉量为2,掉份量为1,播放量为4,因此涨粉率为0.250,截止当前总粉丝数为2。

-- 建立用户-视频互动表

drop table if exists tb_user_video_log;

create table tb_user_video_log (

    id         int comment '自增ID',

    uid        int comment '用户ID',

    video_id   int comment '视频ID',

    start_time string COMMENT '开始观看时间',

    end_time   string COMMENT '结束观看时间',

    if_follow  int comment '是否关注',

    if_like    int comment '是否点赞',

    if_retweet int comment '是否转发',

    comment_id int comment '评论ID'

) comment '用户-视频互动表'

    row format delimited fields terminated by ',';

-- 建立短视频信息表

drop table if exists tb_video_info;

create table tb_video_info (

    id           int comment '自增ID',

    video_id     int comment '视频ID',

    author       int comment '创作者ID',

    tag          string comment '类别标签',

    duration     int comment '视频时长(秒数)',

    release_time string comment '发布时间'

) comment '短视频信息表'

    row format delimited fields terminated by ',';

-- 插入数据

insert into tb_user_video_log

values (1, 101, 2001, '2021-09-01 10:00:00', '2021-09-01 10:00:20', 0, 1, 1, null),

       (2, 105, 2002, '2021-09-10 11:00:00', '2021-09-10 11:00:30', 1, 0, 1, null),

       (3, 101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:20', 1, 1, 1, null),

       (4, 102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:15', 0, 0, 1, null),

       (5, 103, 2001, '2021-10-01 11:00:50', '2021-10-01 11:01:15', 1, 1, 0, 1732526),

       (6, 106, 2002, '2021-10-01 10:59:05', '2021-10-01 11:00:05', 2, 0, 0, null);

insert into tb_video_info

VALUES (1, 2001, 901, '影视', 30, '2021-01-01 7:00:00'),

       (2, 2002, 901, '影视', 60, '2021-01-01 7:00:00'),

       (3, 2003, 902, '旅游', 90, '2020-01-01 7:00:00'),

       (4, 2004, 902, '美女', 90, '2020-01-01 8:00:00');

参考答案:

-- 1. 获取2021年的数据,日期整理成月的形式

select video_id, date_format(start_time, 'yyyy-MM') as m, if_follow

from tb_user_video_log

where year(start_time) = 2021;

-- 2. 计算每一个作者每一个月的粉丝变化数量以及视频的播放次数

select b.author                                  as author,

       a.m                                       as m,

       sum(if(a.if_follow = 2, -1, a.if_follow)) as total_fans_m,

       count(*)                                  as total_play_m

from (

    select video_id, date_format(start_time, 'yyyy-MM') as m, if_follow

    from tb_user_video_log

    where year(start_time) = 2021

) a left join tb_video_info b on a.video_id = b.video_id

group by b.author, a.m;

-- 3. 计算每一个作者到当前月的粉丝变化率以及总粉丝量

select author,

       m as `month`,

       round(total_fans_m / total_play_m, 3) as fans_growth_rate,

       sum(total_fans_m) over (partition by author order by m rows between unbounded preceding and current row ) as total_fans

from (

    select b.author as author,

           a.m as m,

           sum(if(a.if_follow = 2, -1, a.if_follow)) as total_fans_m,

           count(*) as total_play_m

    from (

        select video_id, date_format(start_time, 'yyyy-MM') as m, if_follow

        from tb_user_video_log

        where year(start_time) = 2021) a left join tb_video_info b on a.video_id = b.video_id

      group by b.author, a.m

) t order by author, total_fans;

题目五:国庆期间每类视频点赞量和转发量

有用户-视频互动表tb_user_video_log:

id

uid

video_id

start_time

end_time

if_follow

if_like

if_retweet

comment_id

1

101

2001

2021-09-24 10:00:00

2021-09-24 10:00:20

1

1

0

NULL

2

105

2002

2021-09-25 11:00:00

2021-09-25 11:00:30

0

0

1

NULL

3

102

2002

2021-09-25 11:00:00

2021-09-25 11:00:30

1

1

1

NULL

4

101

2002

2021-09-26 11:00:00

2021-09-26 11:00:30

1

0

1

NULL

5

101

2002

2021-09-27 11:00:00

2021-09-27 11:00:30

1

1

0

NULL

6

102

2002

2021-09-28 11:00:00

2021-09-28 11:00:30

1

0

1

NULL

7

103

2002

2021-09-29 11:00:00

2021-10-02 11:00:30

1

0

1

NULL

8

102

2002

2021-09-30 11:00:00

2021-09-30 11:00:30

1

1

1

NULL

9

101

2001

2021-10-01 10:00:00

2021-10-01 10:00:20

1

1

0

NULL

10

102

2001

2021-10-01 10:00:00

2021-10-01 10:00:15

0

0

1

NULL

11

103

2001

2021-10-01 11:00:50

2021-10-01 11:01:15

1

1

0

1732526

12

106

2002

2021-10-02 10:59:05

2021-10-02 11:00:05

2

0

1

NULL

13

107

2002

2021-10-02 10:59:05

2021-10-02 11:00:05

1

0

1

NULL

14

108

2002

2021-10-02 10:59:05

2021-10-02 11:00:05

1

1

1

NULL

15

109

2002

2021-10-03 10:59:05

2021-10-03 11:00:05

0

1

0

NULL

uid-用户ID,video_id-视频ID,start_time-开始观看时间,end_time-结束观看时间,if_follow-是否关注,if_like-是否点赞,if_retweet-是否转发,comment_id-评论ID。

有短视频信息表tb_video_info:

id

video_id

author

tag

duration

release_time

1

2001

901

影视

30

2021-01-01 07:00:00

2

2002

901

美食

60

2021-01-01 07:00:00

3

2003

902

旅游

90

2021-01-01 07:00:00

4

2004

902

美女

90

2020-01-01 08:00:00

video_id-视频ID,author-创作者ID,tag-类别标签,duration-视频时长(秒),release_time-发布时间。

问题:统计2021年国庆头3天每类视频每天的近一周总点赞量和一周内最大单天转发量,结果按视频类别降序、日期升序排序。假设数据库中数据足够多,至少每个类别下国庆头3天及之前一周的每天都有播放记录。结果如下:

tag

dt

sum_like_cnt_7d

max_retweet_cnt_7d

旅游

2021-10-01

5

2

旅游

2021-10-02

5

3

旅游

2021-10-03

6

3

解释:由表tb_user_video_log里的数据可得只有旅游类视频的播放,2021年9月25到10月3日每天的点赞量和转发量如下:

tag

dt

like_cnt

retweet_cnt

旅游

2021-09-25

1

2

旅游

2021-09-26

0

1

旅游

2021-09-27

1

0

旅游

2021-09-28

0

1

旅游

2021-09-29

0

1

旅游

2021-09-30

1

1

旅游

2021-10-01

2

1

旅游

2021-10-02

1

3

旅游

2021-10-03

1

0

因此国庆头3天(10.01~10.03)里10.01的近7天(9.25~10.01)总点赞量为5次,单天最大转发量为2次(9月25那天最大);同理可得10.02和10.03的两个指标。

-- 建立用户-视频互动表

drop table if exists tb_user_video_log;

create table tb_user_video_log (

    id         int comment '自增ID',

    uid        int comment '用户ID',

    video_id   int comment '视频ID',

    start_time string COMMENT '开始观看时间',

    end_time   string COMMENT '结束观看时间',

    if_follow  int comment '是否关注',

    if_like    int comment '是否点赞',

    if_retweet int comment '是否转发',

    comment_id int comment '评论ID'

) comment '用户-视频互动表'

    row format delimited fields terminated by ',';

-- 建立短视频信息表

drop table if exists tb_video_info;

create table tb_video_info (

    id           int comment '自增ID',

    video_id     int comment '视频ID',

    author       int comment '创作者ID',

    tag          string comment '类别标签',

    duration     int comment '视频时长(秒数)',

    release_time string comment '发布时间'

) comment '短视频信息表'

    row format delimited fields terminated by ',';

-- 插入数据

insert into tb_user_video_log

values (1, 101, 2001, '2021-09-24 10:00:00', '2021-09-24 10:00:20', 1, 1, 0, null),

       (2, 105, 2002, '2021-09-25 11:00:00', '2021-09-25 11:00:30', 0, 0, 1, null),

       (3, 102, 2002, '2021-09-25 11:00:00', '2021-09-25 11:00:30', 1, 1, 1, null),

       (4, 101, 2002, '2021-09-26 11:00:00', '2021-09-26 11:00:30', 1, 0, 1, null),

       (5, 101, 2002, '2021-09-27 11:00:00', '2021-09-27 11:00:30', 1, 1, 0, null),

       (6, 102, 2002, '2021-09-28 11:00:00', '2021-09-28 11:00:30', 1, 0, 1, null),

       (7, 103, 2002, '2021-09-29 11:00:00', '2021-09-29 11:00:30', 1, 0, 1, null),

       (8, 102, 2002, '2021-09-30 11:00:00', '2021-09-30 11:00:30', 1, 1, 1, null),

       (9, 101, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:20', 1, 1, 0, null),

       (10, 102, 2001, '2021-10-01 10:00:00', '2021-10-01 10:00:15', 0, 0, 1, null),

       (11, 103, 2001, '2021-10-01 11:00:50', '2021-10-01 11:01:15', 1, 1, 0, 1732526),

       (12, 106, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:05', 2, 0, 1, null),

       (13, 107, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:05', 1, 0, 1, null),

       (14, 108, 2002, '2021-10-02 10:59:05', '2021-10-02 11:00:05', 1, 1, 1, null),

       (15, 109, 2002, '2021-10-03 10:59:05', '2021-10-03 11:00:05', 0, 1, 0, null);

insert into tb_video_info

VALUES (1, 2001, 901, '影视', 30, '2021-01-01 7:00:00'),

       (2, 2002, 901, '影视', 60, '2021-01-01 7:00:00'),

       (3, 2003, 902, '旅游', 90, '2020-01-01 7:00:00'),

       (4, 2004, 902, '美女', 90, '2020-01-01 8:00:00');

参考答案:

-- 1. 锁定数据范围:2021-09.25~2021-10-03

select video_id, date(start_time), if_like, if_retweet

from tb_user_video_log

where datediff('2021-10-03', start_time) < 9;

-- 2. 统计每一类视频每天的点赞量和转发量

select b.tag             as tag,

       a.dt              as dt,

       sum(a.if_like)    as total_like_d,

       sum(a.if_retweet) as total_retweet_d

from (

     select video_id, date(start_time) as dt, if_like, if_retweet

      from tb_user_video_log

      where datediff('2021-10-03', start_time) < 9

) a left join tb_video_info b on a.video_id = b.video_id

group by b.tag, a.dt;

-- 3. 统计最近7天的点赞总量和最大转发量

select tag,

       dt,

       sum(total_like_d) over (partition by tag order by dt rows between 6 preceding and current row ) as sum_like_cnt_7d,

       max(total_retweet_d) over (partition by tag order by dt rows between 6 preceding and current row)  as max_retweet_cnt_7d

from (

      select b.tag             as tag,

             a.dt              as dt,

             sum(a.if_like)    as total_like_d,

             sum(a.if_retweet) as total_retweet_d

      from (

            select video_id, date(start_time) as dt, if_like, if_retweet

            from tb_user_video_log

            where datediff('2021-10-03', start_time) < 9

      ) a left join tb_video_info b on a.video_id = b.video_id

      group by b.tag, a.dt

) t1;

-- 4. 过滤出10-01~10-03

select *

from (

      select tag,

             dt,

             sum(total_like_d) over (partition by tag order by dt rows between 6 preceding and current row ) as sum_like_cnt_7d,

             max(total_retweet_d) over (partition by tag order by dt rows between 6 preceding and current row)  as max_retweet_cnt_7d

      from (

            select b.tag             as tag,

                   a.dt              as dt,

                   sum(a.if_like)    as total_like_d,

                   sum(a.if_retweet) as total_retweet_d

            from (

                  select video_id, date(start_time) as dt, if_like, if_retweet

                  from tb_user_video_log

                  where datediff('2021-10-03', start_time) < 9

            ) a left join tb_video_info b on a.video_id = b.video_id

            group by b.tag, a.dt

      ) t1

) t2 where month(dt) = 10

order by tag desc, dt asc;

题目六:近一个月发布的视频中热度最高的top3视频

有用户-视频互动表tb_user_video_log:

<

id

uid

video_id

start_time

end_time

if_follow

if_like

if_retweet

comment_id

1

101

2001

2021-09-24 10:00:00

2021-09-24 10:00:30

1

1

1

NULL

2

101

2001

2021-10-01 10:00:00

2021-10-01 10:00:31

1

1

0

NULL