一.需求
求每個(gè)用戶的最長(zhǎng)連續(xù)登錄天數(shù),兩個(gè)日期的間隔小于或等于 3 均視為連續(xù)登錄。比如 01-01 號(hào)登錄,最近的下一次登錄是 01-04 號(hào),兩個(gè)日期的間隔等于 3 天,因此這兩個(gè)日期之間的天數(shù)都算作活躍天數(shù),一共 4 天。
因?yàn)榭紤]MySQL版本問(wèn)題,暫時(shí)不能使用分析函數(shù)。
二.解決方案
測(cè)試數(shù)據(jù):
create table test_login(user_id int,login_date timestamp);
insert into test_login values (1,'2020-01-01 00:01:00');
insert into test_login values (1,'2020-01-01 00:02:00');
insert into test_login values (1,'2020-01-01 00:03:00');
insert into test_login values (1,'2020-01-02 00:03:00');
insert into test_login values (1,'2020-01-05 00:03:00');
insert into test_login values (1,'2020-01-07 00:03:00');
insert into test_login values (1,'2020-01-11 00:03:00');
insert into test_login values (1,'2020-01-12 00:03:00');
insert into test_login values (1,'2020-01-13 00:03:00');
insert into test_login values (1,'2020-01-14 00:03:00');
insert into test_login values (1,'2020-01-17 00:03:00');
insert into test_login values (1,'2020-01-18 00:03:00');
insert into test_login values (1,'2020-01-19 00:03:00');
insert into test_login values (1,'2020-01-22 00:03:00');
insert into test_login values (2,'2020-01-01 00:04:00');
insert into test_login values (2,'2020-01-01 00:05:00');
insert into test_login values (2,'2020-01-01 00:06:00');
insert into test_login values (2,'2020-01-02 00:06:00');
insert into test_login values (2,'2020-01-07 00:06:00');
insert into test_login values (2,'2020-01-11 00:06:00');
insert into test_login values (2,'2020-01-12 00:06:00');
insert into test_login values (2,'2020-01-14 00:06:00');
insert into test_login values (2,'2020-01-18 00:06:00');
insert into test_login values (2,'2020-01-19 00:06:00');
insert into test_login values (2,'2020-01-22 00:06:00');
代碼
SELECT user_id,
max(diff_days) + 1 max_diff_days
from
(
SELECT user_id,
flag2,
min(login_date) min_login_date,
max(login_date) max_login_date,
datediff(max(login_date),min(login_date)) diff_days
from
(
SELECT tmp3.user_id,
tmp3.login_date,
tmp3.flag1,
sum(tmp4.flag1) flag2
from
(
SELECT user_id,
login_date,
case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
from
(
SELECT user_id,
login_date,
(select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2 -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
) tmp3 -- 打標(biāo)記 如上下間隔<=3 則為1 否則為0
left join
(
SELECT user_id,
login_date,
case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
from
(
SELECT user_id,
login_date,
(select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2 -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
) tmp4 -- 功能同tmp3
on tmp3.user_id = tmp4.user_id
and tmp4.login_date <= tmp3.login_date
group by tmp3.user_id,tmp3.login_date,tmp3.flag1
) tmp5 -- 每一個(gè)用戶 每一個(gè)連續(xù)區(qū)間設(shè)置為不同的flag 標(biāo)記為flag2
group by user_id,flag2
) tmp6 -- 計(jì)算出每個(gè)連續(xù)區(qū)間 最大值和最小值 以及連續(xù)間隔天數(shù)
group by user_id
這樣看起來(lái)是不是覺(jué)得非常的復(fù)雜,下面我們拆解開(kāi)
2.1 同一天多次登陸去重
考慮同一天會(huì)有多次登陸的,此處用group by語(yǔ)句進(jìn)行去重,同一天只保留一條記錄
代碼:
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
測(cè)試記錄:
mysql> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d');
+---------+------------+
| user_id | login_date |
+---------+------------+
| 1 | 2020-01-01 |
| 1 | 2020-01-02 |
| 1 | 2020-01-05 |
| 1 | 2020-01-07 |
| 1 | 2020-01-11 |
| 1 | 2020-01-12 |
| 1 | 2020-01-13 |
| 1 | 2020-01-14 |
| 1 | 2020-01-17 |
| 1 | 2020-01-18 |
| 1 | 2020-01-19 |
| 1 | 2020-01-22 |
| 2 | 2020-01-01 |
| 2 | 2020-01-02 |
| 2 | 2020-01-07 |
| 2 | 2020-01-11 |
| 2 | 2020-01-12 |
| 2 | 2020-01-14 |
| 2 | 2020-01-18 |
| 2 | 2020-01-19 |
| 2 | 2020-01-22 |
+---------+------------+
21 rows in set (0.00 sec)
mysql>
2.2 求上一次的登陸日期
我們需要找到上一次登陸日期,與此次的登陸日期對(duì)比,才可以判斷是否符合小于等于3天的條件
此處,我們可以使用標(biāo)量子查詢來(lái)實(shí)現(xiàn)
需要記錄的每個(gè)用戶的第一條登陸信息上一天為null
代碼:
SELECT user_id,
login_date,
(select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
測(cè)試記錄:
mysql> SELECT user_id,
-> login_date,
-> (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
-> from
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
-> ;
+---------+------------+---------------+
| user_id | login_date | up_login_date |
+---------+------------+---------------+
| 1 | 2020-01-01 | NULL |
| 1 | 2020-01-02 | 2020-01-01 |
| 1 | 2020-01-05 | 2020-01-02 |
| 1 | 2020-01-07 | 2020-01-05 |
| 1 | 2020-01-11 | 2020-01-07 |
| 1 | 2020-01-12 | 2020-01-11 |
| 1 | 2020-01-13 | 2020-01-12 |
| 1 | 2020-01-14 | 2020-01-13 |
| 1 | 2020-01-17 | 2020-01-14 |
| 1 | 2020-01-18 | 2020-01-17 |
| 1 | 2020-01-19 | 2020-01-18 |
| 1 | 2020-01-22 | 2020-01-19 |
| 2 | 2020-01-01 | NULL |
| 2 | 2020-01-02 | 2020-01-01 |
| 2 | 2020-01-07 | 2020-01-02 |
| 2 | 2020-01-11 | 2020-01-07 |
| 2 | 2020-01-12 | 2020-01-11 |
| 2 | 2020-01-14 | 2020-01-12 |
| 2 | 2020-01-18 | 2020-01-14 |
| 2 | 2020-01-19 | 2020-01-18 |
| 2 | 2020-01-22 | 2020-01-19 |
+---------+------------+---------------+
21 rows in set (0.00 sec)
2.3 打標(biāo)記判斷是否間隔天數(shù)<=3
求出上一個(gè)登陸日期,與本次登陸日期比較,如果<=3,則標(biāo)記為0,否則標(biāo)記為1
需要注意的是上一步的空值直接標(biāo)記為1
這樣通過(guò)flag1 我們就可以看到連續(xù)登陸的區(qū)間了
代碼:
SELECT user_id,
login_date,
case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
from
(
SELECT user_id,
login_date,
(select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2 -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
測(cè)試記錄:
mysql> SELECT user_id,
-> login_date,
-> case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
-> from
-> (
-> SELECT user_id,
-> login_date,
-> (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
-> from
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
-> ) tmp2 -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
-> ;
+---------+------------+-------+
| user_id | login_date | flag1 |
+---------+------------+-------+
| 1 | 2020-01-01 | 1 |
| 1 | 2020-01-02 | 0 |
| 1 | 2020-01-05 | 0 |
| 1 | 2020-01-07 | 0 |
| 1 | 2020-01-11 | 1 |
| 1 | 2020-01-12 | 0 |
| 1 | 2020-01-13 | 0 |
| 1 | 2020-01-14 | 0 |
| 1 | 2020-01-17 | 0 |
| 1 | 2020-01-18 | 0 |
| 1 | 2020-01-19 | 0 |
| 1 | 2020-01-22 | 0 |
| 2 | 2020-01-01 | 1 |
| 2 | 2020-01-02 | 0 |
| 2 | 2020-01-07 | 1 |
| 2 | 2020-01-11 | 1 |
| 2 | 2020-01-12 | 0 |
| 2 | 2020-01-14 | 0 |
| 2 | 2020-01-18 | 1 |
| 2 | 2020-01-19 | 0 |
| 2 | 2020-01-22 | 0 |
+---------+------------+-------+
21 rows in set (0.01 sec)
2.4 打分組的標(biāo)記
因?yàn)橥粋€(gè)用戶存在多個(gè)連續(xù)登陸的區(qū)間,根據(jù)上一個(gè)步驟的flag1沒(méi)辦法區(qū)分開(kāi),此時(shí)需要區(qū)分開(kāi)同一個(gè)用戶的不同連續(xù)登陸區(qū)間。
因?yàn)镸ySQL版本不支持分析函數(shù),只能通過(guò)臨時(shí)表表連接的方式實(shí)現(xiàn),再考慮去除重復(fù),需要用到分組語(yǔ)句進(jìn)行去重。
其實(shí)flag2是這個(gè)解決方案核心所在,同一個(gè)用戶第一次連續(xù)登陸區(qū)間標(biāo)記為1,第二次則累加為2,以此類(lèi)推。
代碼:
SELECT tmp3.user_id,
tmp3.login_date,
tmp3.flag1,
sum(tmp4.flag1) flag2
from
(
SELECT user_id,
login_date,
case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
from
(
SELECT user_id,
login_date,
(select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2 -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
) tmp3 -- 打標(biāo)記 如上下間隔<=3 則為1 否則為0
left join
(
SELECT user_id,
login_date,
case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
from
(
SELECT user_id,
login_date,
(select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2 -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
) tmp4 -- 功能同tmp3
on tmp3.user_id = tmp4.user_id
and tmp4.login_date <= tmp3.login_date
group by tmp3.user_id,tmp3.login_date,tmp3.flag1
測(cè)試記錄:
mysql> SELECT tmp3.user_id,
-> tmp3.login_date,
-> tmp3.flag1,
-> sum(tmp4.flag1) flag2
-> from
-> (
-> SELECT user_id,
-> login_date,
-> case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
-> from
-> (
-> SELECT user_id,
-> login_date,
-> (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
-> from
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
-> ) tmp2 -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
-> ) tmp3 -- 打標(biāo)記 如上下間隔<=3 則為1 否則為0
-> left join
-> (
-> SELECT user_id,
-> login_date,
-> case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
-> from
-> (
-> SELECT user_id,
-> login_date,
-> (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
-> from
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
-> ) tmp2 -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
-> ) tmp4 -- 功能同tmp3
-> on tmp3.user_id = tmp4.user_id
-> and tmp4.login_date <= tmp3.login_date
-> group by tmp3.user_id,tmp3.login_date,tmp3.flag1;
+---------+------------+-------+-------+
| user_id | login_date | flag1 | flag2 |
+---------+------------+-------+-------+
| 1 | 2020-01-01 | 1 | 1 |
| 1 | 2020-01-02 | 0 | 1 |
| 1 | 2020-01-05 | 0 | 1 |
| 1 | 2020-01-07 | 0 | 1 |
| 1 | 2020-01-11 | 1 | 2 |
| 1 | 2020-01-12 | 0 | 2 |
| 1 | 2020-01-13 | 0 | 2 |
| 1 | 2020-01-14 | 0 | 2 |
| 1 | 2020-01-17 | 0 | 2 |
| 1 | 2020-01-18 | 0 | 2 |
| 1 | 2020-01-19 | 0 | 2 |
| 1 | 2020-01-22 | 0 | 2 |
| 2 | 2020-01-01 | 1 | 1 |
| 2 | 2020-01-02 | 0 | 1 |
| 2 | 2020-01-07 | 1 | 2 |
| 2 | 2020-01-11 | 1 | 3 |
| 2 | 2020-01-12 | 0 | 3 |
| 2 | 2020-01-14 | 0 | 3 |
| 2 | 2020-01-18 | 1 | 4 |
| 2 | 2020-01-19 | 0 | 4 |
| 2 | 2020-01-22 | 0 | 4 |
+---------+------------+-------+-------+
21 rows in set (0.01 sec)
2.5 分組求最大的間隔天數(shù)
其實(shí)有了上一個(gè)步驟的flag2,就可以判斷區(qū)分開(kāi)每一個(gè)用戶的每一個(gè)連續(xù)登陸區(qū)間,直接進(jìn)行分組就可以求出該連續(xù)區(qū)間 最大和最小的登陸日期,兩個(gè)日期差值就是間隔天數(shù)。最后根據(jù)user_id進(jìn)行分組,求最大的間隔天數(shù)就是最終結(jié)果。
需要審題 01-01到01-04 間隔是3天,但是算4天連續(xù)登陸 所以datediff函數(shù)的結(jié)果要+1
代碼:
SELECT user_id,
max(diff_days) +1 max_diff_days
from
(
SELECT user_id,
flag2,
min(login_date) min_login_date,
max(login_date) max_login_date,
datediff(max(login_date),min(login_date)) diff_days
from
(
SELECT tmp3.user_id,
tmp3.login_date,
tmp3.flag1,
sum(tmp4.flag1) flag2
from
(
SELECT user_id,
login_date,
case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
from
(
SELECT user_id,
login_date,
(select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2 -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
) tmp3 -- 打標(biāo)記 如上下間隔<=3 則為1 否則為0
left join
(
SELECT user_id,
login_date,
case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
from
(
SELECT user_id,
login_date,
(select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2 -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
) tmp4 -- 功能同tmp3
on tmp3.user_id = tmp4.user_id
and tmp4.login_date <= tmp3.login_date
group by tmp3.user_id,tmp3.login_date,tmp3.flag1
) tmp5 -- 每一個(gè)用戶 每一個(gè)連續(xù)區(qū)間設(shè)置為不同的flag 標(biāo)記為flag2
group by user_id,flag2
) tmp6 -- 計(jì)算出每個(gè)連續(xù)區(qū)間 最大值和最小值 以及連續(xù)間隔天數(shù)
group by user_id
測(cè)試記錄:
mysql> SELECT user_id,
-> flag2,
-> min(login_date) min_login_date,
-> max(login_date) max_login_date,
-> datediff(max(login_date),min(login_date)) diff_days
-> from
-> (
-> SELECT tmp3.user_id,
-> tmp3.login_date,
-> tmp3.flag1,
-> sum(tmp4.flag1) flag2
-> from
-> (
-> SELECT user_id,
-> login_date,
-> case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
-> from
-> (
-> SELECT user_id,
-> login_date,
-> (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
-> from
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
-> ) tmp2 -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
-> ) tmp3 -- 打標(biāo)記 如上下間隔<=3 則為1 否則為0
-> left join
-> (
-> SELECT user_id,
-> login_date,
-> case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
-> from
-> (
-> SELECT user_id,
-> login_date,
-> (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
-> from
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
-> ) tmp2 -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
-> ) tmp4 -- 功能同tmp3
-> on tmp3.user_id = tmp4.user_id
-> and tmp4.login_date <= tmp3.login_date
-> group by tmp3.user_id,tmp3.login_date,tmp3.flag1
-> ) tmp5 -- 每一個(gè)用戶 每一個(gè)連續(xù)區(qū)間設(shè)置為不同的flag 標(biāo)記為flag2
-> group by user_id,flag2;
+---------+-------+----------------+----------------+-----------+
| user_id | flag2 | min_login_date | max_login_date | diff_days |
+---------+-------+----------------+----------------+-----------+
| 1 | 1 | 2020-01-01 | 2020-01-07 | 6 |
| 1 | 2 | 2020-01-11 | 2020-01-22 | 11 |
| 2 | 1 | 2020-01-01 | 2020-01-02 | 1 |
| 2 | 2 | 2020-01-07 | 2020-01-07 | 0 |
| 2 | 3 | 2020-01-11 | 2020-01-14 | 3 |
| 2 | 4 | 2020-01-18 | 2020-01-22 | 4 |
+---------+-------+----------------+----------------+-----------+
6 rows in set (0.01 sec)
mysql>
mysql> SELECT user_id,
-> max(diff_days) +1 max_diff_days
-> from
-> (
-> SELECT user_id,
-> flag2,
-> min(login_date) min_login_date,
-> max(login_date) max_login_date,
-> datediff(max(login_date),min(login_date)) diff_days
-> from
-> (
-> SELECT tmp3.user_id,
-> tmp3.login_date,
-> tmp3.flag1,
-> sum(tmp4.flag1) flag2
-> from
-> (
-> SELECT user_id,
-> login_date,
-> case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
-> from
-> (
-> SELECT user_id,
-> login_date,
-> (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
-> from
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
-> ) tmp2 -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
-> ) tmp3 -- 打標(biāo)記 如上下間隔<=3 則為1 否則為0
-> left join
-> (
-> SELECT user_id,
-> login_date,
-> case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
-> from
-> (
-> SELECT user_id,
-> login_date,
-> (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
-> from
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ) tmp1 -- tmp1表 分組去重 一天多次登陸的算一條
-> ) tmp2 -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
-> ) tmp4 -- 功能同tmp3
-> on tmp3.user_id = tmp4.user_id
-> and tmp4.login_date <= tmp3.login_date
-> group by tmp3.user_id,tmp3.login_date,tmp3.flag1
-> ) tmp5 -- 每一個(gè)用戶 每一個(gè)連續(xù)區(qū)間設(shè)置為不同的flag 標(biāo)記為flag2
-> group by user_id,flag2
-> ) tmp6 -- 計(jì)算出每個(gè)連續(xù)區(qū)間 最大值和最小值 以及連續(xù)間隔天數(shù)
-> group by user_id
-> ;
+---------+---------------+
| user_id | max_diff_days |
+---------+---------------+
| 1 | 12 |
| 2 | 5 |
+---------+---------------+
2 rows in set (0.00 sec)
mysql>
三.MySQL 8.0寫(xiě)法
MySQL 8.0的with語(yǔ)句以及分析窗口函數(shù),可以使上面的解決方案的代碼簡(jiǎn)潔度大大提升
代碼:
with tmp1 AS
-- tmp1臨時(shí)表 一天多次登陸算一次
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
from test_login
group by user_id,date_format(login_date,'%Y-%m-%d')
),
tmp2 as
-- tmp2臨時(shí)表 通過(guò)lag獲取上一次登陸時(shí)間 如是第一條給默認(rèn)值
(
select user_id,
login_date,
lag(login_date,1,'1900-01-01') over(partition by user_id order by login_date) up_login_date
from tmp1
),
tmp3 AS
-- 判斷是否符合3天內(nèi)標(biāo)準(zhǔn) 打標(biāo)記flag1
(
select user_id,
login_date,
case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end as flag1
from tmp2
),
tmp4 AS
-- 通過(guò)分析函數(shù)將每個(gè)用戶 每一個(gè)連續(xù)登陸期間進(jìn)行標(biāo)記 flag2
(
select user_id,
login_date,
flag1,
sum(flag1) over(partition by user_id order by login_date) as flag2
from tmp3
),
tmp5 AS
-- 通過(guò)user_id flag2進(jìn)行分組
(
SELECT user_id,
flag2,
min(login_date) min_login_date,
max(login_date) max_login_date,
datediff(max(login_date),min(login_date)) diff_days
from tmp4
group by user_id,flag2
)
SELECT user_id,
max(diff_days) + 1 as diff_days
from tmp5
group by user_id;
測(cè)試記錄:
mysql> select version();
+-----------+
| version() |
+-----------+
| 8.0.19 |
+-----------+
1 row in set (0.00 sec)
mysql> with tmp1 AS
-> -- tmp1臨時(shí)表 一天多次登陸算一次
-> (
-> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
-> from test_login
-> group by user_id,date_format(login_date,'%Y-%m-%d')
-> ),
-> tmp2 as
-> -- tmp2臨時(shí)表 通過(guò)lag獲取上一次登陸時(shí)間 如是第一條給默認(rèn)值
-> (
-> select user_id,
-> login_date,
-> lag(login_date,1,'1900-01-01') over(partition by user_id order by login_date) up_login_d
-> from tmp1
-> ),
-> tmp3 AS
-> -- 判斷是否符合3天內(nèi)標(biāo)準(zhǔn) 打標(biāo)記flag1
-> (
-> select user_id,
-> login_date,
-> case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end as flag1
-> from tmp2
-> ),
-> tmp4 AS
-> -- 通過(guò)分析函數(shù)將每個(gè)用戶 每一個(gè)連續(xù)登陸期間進(jìn)行標(biāo)記 flag2
-> (
-> select user_id,
-> login_date,
-> flag1,
-> sum(flag1) over(partition by user_id order by login_date) as flag2
-> from tmp3
-> ),
-> tmp5 AS
-> -- 通過(guò)user_id flag2進(jìn)行分組
-> (
-> SELECT user_id,
-> flag2,
-> min(login_date) min_login_date,
-> max(login_date) max_login_date,
-> datediff(max(login_date),min(login_date)) diff_days
-> from tmp4
-> group by user_id,flag2
-> )
-> SELECT user_id,
-> max(diff_days) + 1 as diff_days
-> from tmp5
-> group by user_id;
+---------+-----------+
| user_id | diff_days |
+---------+-----------+
| 1 | 12 |
| 2 | 5 |
+---------+-----------+
2 rows in set (0.00 sec)