一、求出场率与出厂次数
1、有如下数据:(建表语句+sql查询)
id names
1 aa,bb,cc,dd,ee
2 aa,bb,ff,ww,qq
3 aa,cc,rr,yy
4 aa,bb,dd,oo,pp
2、求英雄的出场排名top3的出场次数及出场率
create table if not exists t_names(
id int,
names array
)
row format delimited
fields terminated by ‘ ’
collection items terminated by ‘,’
;
select *
from (
select name,cc,cc / (sum(cc) over()) as ccl,
rank() over(sort by cc desc) as rk
from (
select
name,
count(1) as cc
from t_names lateral view explode(names) tt as name
group by
name
) a
) aa
where aa.rk <= 3
;
二、求通话时长
1、有如下通话记录:
Zhangsan Wangwu 01:01:01
Zhangsan Zhaoliu 00:11:21
Zhangsan Yuqi 00:19:01
Zhangsan Jingba 00:21:01
Zhangsan Wuxi 01:31:17
Wangwu Zhaoliu 00:51:01
Wangwu Zhaoliu 01:11:19
Wangwu Yuqi 00:00:21
Wangwu Yuqi 00:23:01
Yuqi Zhaoliu 01:18:01
Yuqi Wuxi 00:18:00
Jingba Wangwu 00:01:01
Jingba Wangwu 00:00:06
Jingba Wangwu 00:02:04
Jingba Wangwu 00:02:54
Wangwu Yuqi 01:00:13
Wangwu Yuqi 00:01:01
Wangwu Zhangsan 00:01:01
2、统计两个人的通话总时长(用户之间互相通话的时长)
create table relations(
fromstr string,
tostr string,
time string
)
row format delimited
fields terminated by ’ ’
;
select
fromstr,
tostr,
sum(duration) as durations
from (
Select
Case when fromstr >= tostr then fromstr else tostr end fromstr,
Case when fromstr >= tostr then tostr else fromstr end tostr,
Split(time,’:’)[0] * 60 * 60 + Split(time,’:’)[1] * 60 + Split(time,’:’)[2] duration
from relations
) a
group by fromstr,tostr
;
三、求出每个店铺的当月销售额和累计到当月的总销售额
1、有如下销售数据:(建表语句+sql查询)
店铺 月份 金额
a,01,150
a,01,200
b,01,1000
b,01,800
c,01,250
c,01,220
b,01,6000
a,02,2000
a,02,3000
b,02,1000
b,02,1500
c,02,350
c,02,280
a,03,350
a,03,250
2、编写Hive的HQL语句求出每个店铺的当月销售额和累计到当月的总销售额
create table t_store(
name string,
months int,
money int
)
row format delimited fields terminated by “,”;
select name,months,amoney,sum(amoney) over(distribute by name sort by months asc rows between unbounded preceding and current row) as totalmomey
from (
Select name,months,sum(money) as amoney
From t_store
Group by name,months
) a
;
四、统计amt连续3个月,环比增长>50%的user
user_id month amt
1,20170101,100
3,20170101,20
4,20170101,30
1,20170102,200
2,20170102,240
3,20170102,30
4,20170102,2
1,20170101,180
2,20170101,250
3,20170101,30
4,20170101,260
…
…
select user_id
from(
select
user_id,month,mon_amt,pre_mon_amt,
sum(case when ((mon_amt - pre_mon_amt) / pre_mon_amt * 100) > 50
and datediff(to_date(month,‘yyyymm’),to_date(pre2_month,‘yyyymm’),‘mm’) = 2
then 1
else 0 end) over(partition by user_id order by month asc rows between current row and 2 following) as flag
from (
select
user_id,
substr(month,0,6) as month,
sum(amt) as mon_amt,
lag(sum(amt),1,0.00001) over(partition by user_id order by substr(month,0,6) asc ) as pre_mon_amt,
substr(lag(substr(month,0,6),2,‘199001’) over(partition by user_id order by substr(month,0,6) asc),0,6) as pre_2_mon
from amt
group by user_id,substr(month,0,6)
) t1
) t2
where t2.flag >=3;