MySQL求連續(xù)登陸天數(shù)-鵝廠面試題

一.需求

求每個(gè)用戶的最長(zhǎng)連續(xù)登錄天數(shù),兩個(gè)日期的間隔小于或等于 3 均視為連續(xù)登錄。比如 01-01 號(hào)登錄,最近的下一次登錄是 01-04 號(hào),兩個(gè)日期的間隔等于 3 天,因此這兩個(gè)日期之間的天數(shù)都算作活躍天數(shù),一共 4 天。

因?yàn)榭紤]MySQL版本問(wèn)題,暫時(shí)不能使用分析函數(shù)。

二.解決方案

測(cè)試數(shù)據(jù):

create table test_login(user_id int,login_date timestamp);

insert into test_login values (1,'2020-01-01 00:01:00');
insert into test_login values (1,'2020-01-01 00:02:00');
insert into test_login values (1,'2020-01-01 00:03:00');
insert into test_login values (1,'2020-01-02 00:03:00');
insert into test_login values (1,'2020-01-05 00:03:00');
insert into test_login values (1,'2020-01-07 00:03:00');
insert into test_login values (1,'2020-01-11 00:03:00');
insert into test_login values (1,'2020-01-12 00:03:00');
insert into test_login values (1,'2020-01-13 00:03:00');
insert into test_login values (1,'2020-01-14 00:03:00');
insert into test_login values (1,'2020-01-17 00:03:00');
insert into test_login values (1,'2020-01-18 00:03:00');
insert into test_login values (1,'2020-01-19 00:03:00');
insert into test_login values (1,'2020-01-22 00:03:00');

insert into test_login values (2,'2020-01-01 00:04:00');
insert into test_login values (2,'2020-01-01 00:05:00');
insert into test_login values (2,'2020-01-01 00:06:00');
insert into test_login values (2,'2020-01-02 00:06:00');
insert into test_login values (2,'2020-01-07 00:06:00');
insert into test_login values (2,'2020-01-11 00:06:00');
insert into test_login values (2,'2020-01-12 00:06:00');
insert into test_login values (2,'2020-01-14 00:06:00');
insert into test_login values (2,'2020-01-18 00:06:00');
insert into test_login values (2,'2020-01-19 00:06:00');
insert into test_login values (2,'2020-01-22 00:06:00');

代碼

SELECT user_id,
       max(diff_days) + 1 max_diff_days
  from 
(
SELECT user_id,
       flag2,
       min(login_date) min_login_date,
       max(login_date) max_login_date,
       datediff(max(login_date),min(login_date)) diff_days
  from 
(
SELECT tmp3.user_id,
       tmp3.login_date,
       tmp3.flag1,
       sum(tmp4.flag1) flag2
  from 
(
SELECT user_id,
       login_date,
       case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
  from 
(
SELECT  user_id,
        login_date,
        (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from 
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
  from test_login 
 group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1  -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2  -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
) tmp3  -- 打標(biāo)記 如上下間隔<=3 則為1  否則為0
left join 
(
SELECT user_id,
       login_date,
       case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
  from 
(
SELECT  user_id,
        login_date,
        (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from 
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
  from test_login 
 group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1  -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2  -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
) tmp4  -- 功能同tmp3 
on  tmp3.user_id = tmp4.user_id
and tmp4.login_date <= tmp3.login_date
group by tmp3.user_id,tmp3.login_date,tmp3.flag1
) tmp5 -- 每一個(gè)用戶  每一個(gè)連續(xù)區(qū)間設(shè)置為不同的flag  標(biāo)記為flag2
group by user_id,flag2
) tmp6 -- 計(jì)算出每個(gè)連續(xù)區(qū)間 最大值和最小值  以及連續(xù)間隔天數(shù)
group by user_id

這樣看起來(lái)是不是覺(jué)得非常的復(fù)雜,下面我們拆解開(kāi)

2.1 同一天多次登陸去重

考慮同一天會(huì)有多次登陸的,此處用group by語(yǔ)句進(jìn)行去重,同一天只保留一條記錄

代碼:

SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
  from test_login 
 group by user_id,date_format(login_date,'%Y-%m-%d')

測(cè)試記錄:

mysql> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
    ->   from test_login 
    ->  group by user_id,date_format(login_date,'%Y-%m-%d');
+---------+------------+
| user_id | login_date |
+---------+------------+
|       1 | 2020-01-01 |
|       1 | 2020-01-02 |
|       1 | 2020-01-05 |
|       1 | 2020-01-07 |
|       1 | 2020-01-11 |
|       1 | 2020-01-12 |
|       1 | 2020-01-13 |
|       1 | 2020-01-14 |
|       1 | 2020-01-17 |
|       1 | 2020-01-18 |
|       1 | 2020-01-19 |
|       1 | 2020-01-22 |
|       2 | 2020-01-01 |
|       2 | 2020-01-02 |
|       2 | 2020-01-07 |
|       2 | 2020-01-11 |
|       2 | 2020-01-12 |
|       2 | 2020-01-14 |
|       2 | 2020-01-18 |
|       2 | 2020-01-19 |
|       2 | 2020-01-22 |
+---------+------------+
21 rows in set (0.00 sec)

mysql> 

2.2 求上一次的登陸日期

我們需要找到上一次登陸日期,與此次的登陸日期對(duì)比,才可以判斷是否符合小于等于3天的條件
此處,我們可以使用標(biāo)量子查詢來(lái)實(shí)現(xiàn)
需要記錄的每個(gè)用戶的第一條登陸信息上一天為null

代碼:

SELECT  user_id,
        login_date,
        (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from 
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
  from test_login 
 group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1  -- tmp1表 分組去重 一天多次登陸的算一條

測(cè)試記錄:

mysql> SELECT  user_id,
    ->         login_date,
    ->         (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
    -> from 
    -> (
    -> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
    ->   from test_login 
    ->  group by user_id,date_format(login_date,'%Y-%m-%d')
    -> ) tmp1  -- tmp1表 分組去重 一天多次登陸的算一條
    -> ;
+---------+------------+---------------+
| user_id | login_date | up_login_date |
+---------+------------+---------------+
|       1 | 2020-01-01 | NULL          |
|       1 | 2020-01-02 | 2020-01-01    |
|       1 | 2020-01-05 | 2020-01-02    |
|       1 | 2020-01-07 | 2020-01-05    |
|       1 | 2020-01-11 | 2020-01-07    |
|       1 | 2020-01-12 | 2020-01-11    |
|       1 | 2020-01-13 | 2020-01-12    |
|       1 | 2020-01-14 | 2020-01-13    |
|       1 | 2020-01-17 | 2020-01-14    |
|       1 | 2020-01-18 | 2020-01-17    |
|       1 | 2020-01-19 | 2020-01-18    |
|       1 | 2020-01-22 | 2020-01-19    |
|       2 | 2020-01-01 | NULL          |
|       2 | 2020-01-02 | 2020-01-01    |
|       2 | 2020-01-07 | 2020-01-02    |
|       2 | 2020-01-11 | 2020-01-07    |
|       2 | 2020-01-12 | 2020-01-11    |
|       2 | 2020-01-14 | 2020-01-12    |
|       2 | 2020-01-18 | 2020-01-14    |
|       2 | 2020-01-19 | 2020-01-18    |
|       2 | 2020-01-22 | 2020-01-19    |
+---------+------------+---------------+
21 rows in set (0.00 sec)

2.3 打標(biāo)記判斷是否間隔天數(shù)<=3

求出上一個(gè)登陸日期,與本次登陸日期比較,如果<=3,則標(biāo)記為0,否則標(biāo)記為1
需要注意的是上一步的空值直接標(biāo)記為1
這樣通過(guò)flag1 我們就可以看到連續(xù)登陸的區(qū)間了

代碼:

SELECT user_id,
       login_date,
       case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
  from 
(
SELECT  user_id,
        login_date,
        (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from 
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
  from test_login 
 group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1  -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2  -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期

測(cè)試記錄:

mysql> SELECT user_id,
    ->        login_date,
    ->        case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
    ->   from 
    -> (
    -> SELECT  user_id,
    ->         login_date,
    ->         (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
    -> from 
    -> (
    -> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
    ->   from test_login 
    ->  group by user_id,date_format(login_date,'%Y-%m-%d')
    -> ) tmp1  -- tmp1表 分組去重 一天多次登陸的算一條
    -> ) tmp2  -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
    -> ;
+---------+------------+-------+
| user_id | login_date | flag1 |
+---------+------------+-------+
|       1 | 2020-01-01 |     1 |
|       1 | 2020-01-02 |     0 |
|       1 | 2020-01-05 |     0 |
|       1 | 2020-01-07 |     0 |
|       1 | 2020-01-11 |     1 |
|       1 | 2020-01-12 |     0 |
|       1 | 2020-01-13 |     0 |
|       1 | 2020-01-14 |     0 |
|       1 | 2020-01-17 |     0 |
|       1 | 2020-01-18 |     0 |
|       1 | 2020-01-19 |     0 |
|       1 | 2020-01-22 |     0 |
|       2 | 2020-01-01 |     1 |
|       2 | 2020-01-02 |     0 |
|       2 | 2020-01-07 |     1 |
|       2 | 2020-01-11 |     1 |
|       2 | 2020-01-12 |     0 |
|       2 | 2020-01-14 |     0 |
|       2 | 2020-01-18 |     1 |
|       2 | 2020-01-19 |     0 |
|       2 | 2020-01-22 |     0 |
+---------+------------+-------+
21 rows in set (0.01 sec)

2.4 打分組的標(biāo)記

因?yàn)橥粋€(gè)用戶存在多個(gè)連續(xù)登陸的區(qū)間,根據(jù)上一個(gè)步驟的flag1沒(méi)辦法區(qū)分開(kāi),此時(shí)需要區(qū)分開(kāi)同一個(gè)用戶的不同連續(xù)登陸區(qū)間。

因?yàn)镸ySQL版本不支持分析函數(shù),只能通過(guò)臨時(shí)表表連接的方式實(shí)現(xiàn),再考慮去除重復(fù),需要用到分組語(yǔ)句進(jìn)行去重。

其實(shí)flag2是這個(gè)解決方案核心所在,同一個(gè)用戶第一次連續(xù)登陸區(qū)間標(biāo)記為1,第二次則累加為2,以此類(lèi)推。

代碼:

SELECT tmp3.user_id,
       tmp3.login_date,
       tmp3.flag1,
       sum(tmp4.flag1) flag2
  from 
(
SELECT user_id,
       login_date,
       case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
  from 
(
SELECT  user_id,
        login_date,
        (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from 
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
  from test_login 
 group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1  -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2  -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
) tmp3  -- 打標(biāo)記 如上下間隔<=3 則為1  否則為0
left join 
(
SELECT user_id,
       login_date,
       case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
  from 
(
SELECT  user_id,
        login_date,
        (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from 
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
  from test_login 
 group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1  -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2  -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
) tmp4  -- 功能同tmp3 
on  tmp3.user_id = tmp4.user_id
and tmp4.login_date <= tmp3.login_date
group by tmp3.user_id,tmp3.login_date,tmp3.flag1

測(cè)試記錄:

mysql> SELECT tmp3.user_id,
    ->        tmp3.login_date,
    ->        tmp3.flag1,
    ->        sum(tmp4.flag1) flag2
    ->   from 
    -> (
    -> SELECT user_id,
    ->        login_date,
    ->        case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
    ->   from 
    -> (
    -> SELECT  user_id,
    ->         login_date,
    ->         (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
    -> from 
    -> (
    -> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
    ->   from test_login 
    ->  group by user_id,date_format(login_date,'%Y-%m-%d')
    -> ) tmp1  -- tmp1表 分組去重 一天多次登陸的算一條
    -> ) tmp2  -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
    -> ) tmp3  -- 打標(biāo)記 如上下間隔<=3 則為1  否則為0
    -> left join 
    -> (
    -> SELECT user_id,
    ->        login_date,
    ->        case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
    ->   from 
    -> (
    -> SELECT  user_id,
    ->         login_date,
    ->         (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
    -> from 
    -> (
    -> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
    ->   from test_login 
    ->  group by user_id,date_format(login_date,'%Y-%m-%d')
    -> ) tmp1  -- tmp1表 分組去重 一天多次登陸的算一條
    -> ) tmp2  -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
    -> ) tmp4  -- 功能同tmp3 
    -> on  tmp3.user_id = tmp4.user_id
    -> and tmp4.login_date <= tmp3.login_date
    -> group by tmp3.user_id,tmp3.login_date,tmp3.flag1;
+---------+------------+-------+-------+
| user_id | login_date | flag1 | flag2 |
+---------+------------+-------+-------+
|       1 | 2020-01-01 |     1 |     1 |
|       1 | 2020-01-02 |     0 |     1 |
|       1 | 2020-01-05 |     0 |     1 |
|       1 | 2020-01-07 |     0 |     1 |
|       1 | 2020-01-11 |     1 |     2 |
|       1 | 2020-01-12 |     0 |     2 |
|       1 | 2020-01-13 |     0 |     2 |
|       1 | 2020-01-14 |     0 |     2 |
|       1 | 2020-01-17 |     0 |     2 |
|       1 | 2020-01-18 |     0 |     2 |
|       1 | 2020-01-19 |     0 |     2 |
|       1 | 2020-01-22 |     0 |     2 |
|       2 | 2020-01-01 |     1 |     1 |
|       2 | 2020-01-02 |     0 |     1 |
|       2 | 2020-01-07 |     1 |     2 |
|       2 | 2020-01-11 |     1 |     3 |
|       2 | 2020-01-12 |     0 |     3 |
|       2 | 2020-01-14 |     0 |     3 |
|       2 | 2020-01-18 |     1 |     4 |
|       2 | 2020-01-19 |     0 |     4 |
|       2 | 2020-01-22 |     0 |     4 |
+---------+------------+-------+-------+
21 rows in set (0.01 sec)

2.5 分組求最大的間隔天數(shù)

其實(shí)有了上一個(gè)步驟的flag2,就可以判斷區(qū)分開(kāi)每一個(gè)用戶的每一個(gè)連續(xù)登陸區(qū)間,直接進(jìn)行分組就可以求出該連續(xù)區(qū)間 最大和最小的登陸日期,兩個(gè)日期差值就是間隔天數(shù)。最后根據(jù)user_id進(jìn)行分組,求最大的間隔天數(shù)就是最終結(jié)果。

需要審題 01-01到01-04 間隔是3天,但是算4天連續(xù)登陸 所以datediff函數(shù)的結(jié)果要+1

代碼:

SELECT user_id,
       max(diff_days) +1 max_diff_days
  from 
(
SELECT user_id,
       flag2,
       min(login_date) min_login_date,
       max(login_date) max_login_date,
       datediff(max(login_date),min(login_date)) diff_days
  from 
(
SELECT tmp3.user_id,
       tmp3.login_date,
       tmp3.flag1,
       sum(tmp4.flag1) flag2
  from 
(
SELECT user_id,
       login_date,
       case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
  from 
(
SELECT  user_id,
        login_date,
        (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from 
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
  from test_login 
 group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1  -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2  -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
) tmp3  -- 打標(biāo)記 如上下間隔<=3 則為1  否則為0
left join 
(
SELECT user_id,
       login_date,
       case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
  from 
(
SELECT  user_id,
        login_date,
        (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
from 
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
  from test_login 
 group by user_id,date_format(login_date,'%Y-%m-%d')
) tmp1  -- tmp1表 分組去重 一天多次登陸的算一條
) tmp2  -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
) tmp4  -- 功能同tmp3 
on  tmp3.user_id = tmp4.user_id
and tmp4.login_date <= tmp3.login_date
group by tmp3.user_id,tmp3.login_date,tmp3.flag1
) tmp5 -- 每一個(gè)用戶  每一個(gè)連續(xù)區(qū)間設(shè)置為不同的flag  標(biāo)記為flag2
group by user_id,flag2
) tmp6 -- 計(jì)算出每個(gè)連續(xù)區(qū)間 最大值和最小值  以及連續(xù)間隔天數(shù)
group by user_id

測(cè)試記錄:

mysql> SELECT user_id,
    ->        flag2,
    ->        min(login_date) min_login_date,
    ->        max(login_date) max_login_date,
    ->        datediff(max(login_date),min(login_date)) diff_days
    ->   from 
    -> (
    -> SELECT tmp3.user_id,
    ->        tmp3.login_date,
    ->        tmp3.flag1,
    ->        sum(tmp4.flag1) flag2
    ->   from 
    -> (
    -> SELECT user_id,
    ->        login_date,
    ->        case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
    ->   from 
    -> (
    -> SELECT  user_id,
    ->         login_date,
    ->         (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
    -> from 
    -> (
    -> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
    ->   from test_login 
    ->  group by user_id,date_format(login_date,'%Y-%m-%d')
    -> ) tmp1  -- tmp1表 分組去重 一天多次登陸的算一條
    -> ) tmp2  -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
    -> ) tmp3  -- 打標(biāo)記 如上下間隔<=3 則為1  否則為0
    -> left join 
    -> (
    -> SELECT user_id,
    ->        login_date,
    ->        case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
    ->   from 
    -> (
    -> SELECT  user_id,
    ->         login_date,
    ->         (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
    -> from 
    -> (
    -> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
    ->   from test_login 
    ->  group by user_id,date_format(login_date,'%Y-%m-%d')
    -> ) tmp1  -- tmp1表 分組去重 一天多次登陸的算一條
    -> ) tmp2  -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
    -> ) tmp4  -- 功能同tmp3 
    -> on  tmp3.user_id = tmp4.user_id
    -> and tmp4.login_date <= tmp3.login_date
    -> group by tmp3.user_id,tmp3.login_date,tmp3.flag1
    -> ) tmp5 -- 每一個(gè)用戶  每一個(gè)連續(xù)區(qū)間設(shè)置為不同的flag  標(biāo)記為flag2
    -> group by user_id,flag2;
+---------+-------+----------------+----------------+-----------+
| user_id | flag2 | min_login_date | max_login_date | diff_days |
+---------+-------+----------------+----------------+-----------+
|       1 |     1 | 2020-01-01     | 2020-01-07     |         6 |
|       1 |     2 | 2020-01-11     | 2020-01-22     |        11 |
|       2 |     1 | 2020-01-01     | 2020-01-02     |         1 |
|       2 |     2 | 2020-01-07     | 2020-01-07     |         0 |
|       2 |     3 | 2020-01-11     | 2020-01-14     |         3 |
|       2 |     4 | 2020-01-18     | 2020-01-22     |         4 |
+---------+-------+----------------+----------------+-----------+
6 rows in set (0.01 sec)

mysql>


mysql> SELECT user_id,
    ->        max(diff_days) +1 max_diff_days
    ->   from 
    -> (
    -> SELECT user_id,
    ->        flag2,
    ->        min(login_date) min_login_date,
    ->        max(login_date) max_login_date,
    ->        datediff(max(login_date),min(login_date)) diff_days
    ->   from 
    -> (
    -> SELECT tmp3.user_id,
    ->        tmp3.login_date,
    ->        tmp3.flag1,
    ->        sum(tmp4.flag1) flag2
    ->   from 
    -> (
    -> SELECT user_id,
    ->        login_date,
    ->        case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
    ->   from 
    -> (
    -> SELECT  user_id,
    ->         login_date,
    ->         (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
    -> from 
    -> (
    -> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
    ->   from test_login 
    ->  group by user_id,date_format(login_date,'%Y-%m-%d')
    -> ) tmp1  -- tmp1表 分組去重 一天多次登陸的算一條
    -> ) tmp2  -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
    -> ) tmp3  -- 打標(biāo)記 如上下間隔<=3 則為1  否則為0
    -> left join 
    -> (
    -> SELECT user_id,
    ->        login_date,
    ->        case when up_login_date is null then 1 else case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1 end end as flag1
    ->   from 
    -> (
    -> SELECT  user_id,
    ->         login_date,
    ->         (select max(date_format(login_date,'%Y-%m-%d')) from test_login t2 where t2.user_id = tmp1.user_id and t2.login_date < tmp1.login_date ) as up_login_date
    -> from 
    -> (
    -> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
    ->   from test_login 
    ->  group by user_id,date_format(login_date,'%Y-%m-%d')
    -> ) tmp1  -- tmp1表 分組去重 一天多次登陸的算一條
    -> ) tmp2  -- 通過(guò)標(biāo)量子查詢實(shí)現(xiàn)上一天的登陸日期
    -> ) tmp4  -- 功能同tmp3 
    -> on  tmp3.user_id = tmp4.user_id
    -> and tmp4.login_date <= tmp3.login_date
    -> group by tmp3.user_id,tmp3.login_date,tmp3.flag1
    -> ) tmp5 -- 每一個(gè)用戶  每一個(gè)連續(xù)區(qū)間設(shè)置為不同的flag  標(biāo)記為flag2
    -> group by user_id,flag2
    -> ) tmp6 -- 計(jì)算出每個(gè)連續(xù)區(qū)間 最大值和最小值  以及連續(xù)間隔天數(shù)
    -> group by user_id
    -> ;
+---------+---------------+
| user_id | max_diff_days |
+---------+---------------+
|       1 |            12 |
|       2 |             5 |
+---------+---------------+
2 rows in set (0.00 sec)

mysql> 
 

三.MySQL 8.0寫(xiě)法

MySQL 8.0的with語(yǔ)句以及分析窗口函數(shù),可以使上面的解決方案的代碼簡(jiǎn)潔度大大提升

代碼:

with tmp1 AS
-- tmp1臨時(shí)表 一天多次登陸算一次
(
SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
  from test_login 
 group by user_id,date_format(login_date,'%Y-%m-%d')
),
tmp2 as
-- tmp2臨時(shí)表  通過(guò)lag獲取上一次登陸時(shí)間  如是第一條給默認(rèn)值
(
select user_id,
       login_date,
       lag(login_date,1,'1900-01-01') over(partition by user_id order by login_date) up_login_date
  from tmp1
),
tmp3 AS
-- 判斷是否符合3天內(nèi)標(biāo)準(zhǔn)  打標(biāo)記flag1
(
select  user_id,
        login_date,
        case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1  end as flag1
  from  tmp2
),
tmp4 AS
-- 通過(guò)分析函數(shù)將每個(gè)用戶 每一個(gè)連續(xù)登陸期間進(jìn)行標(biāo)記 flag2
(
select  user_id,
        login_date,
        flag1,
        sum(flag1) over(partition by user_id order by login_date) as flag2
  from tmp3
),
tmp5 AS
-- 通過(guò)user_id flag2進(jìn)行分組
(
SELECT user_id,
       flag2,
       min(login_date) min_login_date,
       max(login_date) max_login_date,
       datediff(max(login_date),min(login_date)) diff_days
  from tmp4
 group by user_id,flag2
)
SELECT user_id,
       max(diff_days) + 1 as diff_days
  from tmp5
 group by user_id;

測(cè)試記錄:

mysql> select version();
+-----------+
| version() |
+-----------+
| 8.0.19    |
+-----------+
1 row in set (0.00 sec)

mysql> with tmp1 AS
    -> -- tmp1臨時(shí)表 一天多次登陸算一次
    -> (
    -> SELECT user_id,date_format(login_date,'%Y-%m-%d') login_date
    ->   from test_login
    ->  group by user_id,date_format(login_date,'%Y-%m-%d')
    -> ),
    -> tmp2 as
    -> -- tmp2臨時(shí)表  通過(guò)lag獲取上一次登陸時(shí)間  如是第一條給默認(rèn)值
    -> (
    -> select user_id,
    ->        login_date,
    ->        lag(login_date,1,'1900-01-01') over(partition by user_id order by login_date) up_login_d
    ->   from tmp1
    -> ),
    -> tmp3 AS
    -> -- 判斷是否符合3天內(nèi)標(biāo)準(zhǔn)  打標(biāo)記flag1
    -> (
    -> select  user_id,
    ->         login_date,
    ->         case when DATEDIFF(login_date,up_login_date) <= 3 then 0 else 1  end as flag1
    ->   from  tmp2
    -> ),
    -> tmp4 AS
    -> -- 通過(guò)分析函數(shù)將每個(gè)用戶 每一個(gè)連續(xù)登陸期間進(jìn)行標(biāo)記 flag2
    -> (
    -> select  user_id,
    ->         login_date,
    ->         flag1,
    ->         sum(flag1) over(partition by user_id order by login_date) as flag2
    ->   from tmp3
    -> ),
    -> tmp5 AS
    -> -- 通過(guò)user_id flag2進(jìn)行分組
    -> (
    -> SELECT user_id,
    ->        flag2,
    ->        min(login_date) min_login_date,
    ->        max(login_date) max_login_date,
    ->        datediff(max(login_date),min(login_date)) diff_days
    ->   from tmp4
    ->  group by user_id,flag2
    -> )
    -> SELECT user_id,
    ->        max(diff_days) + 1 as diff_days
    ->   from tmp5
    ->  group by user_id;
+---------+-----------+
| user_id | diff_days |
+---------+-----------+
|       1 |        12 |
|       2 |         5 |
+---------+-----------+
2 rows in set (0.00 sec)

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時(shí)請(qǐng)結(jié)合常識(shí)與多方信息審慎甄別。
平臺(tái)聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點(diǎn),簡(jiǎn)書(shū)系信息發(fā)布平臺(tái),僅提供信息存儲(chǔ)服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容