mysql數(shù)據(jù)分析項目之2:淘寶電商用戶行為分析(AARRR轉(zhuǎn)化漏斗與RFM模型)

本項目基于淘寶用戶行為數(shù)據(jù),探索用戶行為規(guī)律,尋找高價值用戶,具體指標(biāo)包括:日PV和日UV分析,支付率分析,復(fù)購行為分析,漏斗轉(zhuǎn)化率分析和用戶價值RFM分析。

分析步驟

在MySQL關(guān)系型數(shù)據(jù)庫,利用SQL對淘寶用戶行為進行數(shù)據(jù)分析。分析步驟如下:
提出問題
數(shù)據(jù)準(zhǔn)備
數(shù)據(jù)預(yù)處理
建模分析

提出問題

本次分析的目的是想通過對淘寶用戶行為數(shù)據(jù)分析,為以下問題提供解釋和改進建議:
1.基于AARRR模型的電商分析指標(biāo),確定各個環(huán)節(jié)的轉(zhuǎn)換率,找到需要改進的環(huán)節(jié)
2.時間維度分析:研究用戶在不同時間維度下的行為規(guī)律
3.商品分析:研究用戶對不同種類商品的偏好,找到針對不同商品的營銷策略
4.基于RFM模型找出核心付費用戶群,對這部分用戶進行精準(zhǔn)營銷

數(shù)據(jù)準(zhǔn)備

數(shù)據(jù)來源:略
利用Python導(dǎo)入數(shù)據(jù),至mysql(略)
查看數(shù)據(jù)、查看列字段屬性

SELECT * FROM data.tianchi_mobile_recommend_train_user;

SELECT count(1) FROM data.tianchi_mobile_recommend_train_user;  -- 1048575

desc tianchi_mobile_recommend_train_user;
1.PNG

理解數(shù)據(jù):數(shù)據(jù)集導(dǎo)入1048575條數(shù)據(jù);包括6個字段,分別是

  • user_id:用戶ID,脫敏
  • item_id:商品ID,脫敏
  • behavior_type:用戶行為類型,取值1,2,3,4,分別表示用戶的點擊,收藏,加入購物車,購買四種行為
  • user_geohash:地理位置
  • item_category:品類ID,商品所屬的品類
  • time:用戶行為發(fā)生的時間
2.PNG

數(shù)據(jù)預(yù)處理

數(shù)據(jù)類型處理:增加新列date_time、dates和hours,便于后續(xù)時間維度分析;刪列

set sql_safe_updates=0;
-- 增加新列date_time、dates和hours
alter table tianchi_mobile_recommend_train_user add column date_time datetime null;
update tianchi_mobile_recommend_train_user
set date_time =str_to_date(time,'%Y-%m-%d %H') 
-- %H可以表示0-23;而%h表示0-12.注意空格導(dǎo)致的報錯

alter table tianchi_mobile_recommend_train_user add column dates char(10) null;
update tianchi_mobile_recommend_train_user 
set dates=date(date_time);

alter table data.tianchi_mobile_recommend_train_user add column hours char(10) null;
update data.tianchi_mobile_recommend_train_user
set hours=time (date_time);

缺失值處理:因缺失值過多,刪去地理信息user_geohash;刪去time列

-- 因缺失值過多,刪去地理信息user_geohash;刪去time列
alter table data.tianchi_mobile_recommend_train_user drop column user_geohash;
alter table data.tianchi_mobile_recommend_train_user drop column time;

SELECT * FROM data.tianchi_mobile_recommend_train_user;
3.PNG

重復(fù)值處理:創(chuàng)建新表a,并插入前50000條無重復(fù)數(shù)據(jù)

create table a insert into a select distinct * from XXX #創(chuàng)建新表a,并將重復(fù)值去除

-- 創(chuàng)建新表a,并插入前50000條數(shù)據(jù)?!緇imit 50000不需要加括號??!】
create table a like data.tianchi_mobile_recommend_train_user;
insert into a select distinct * from data.tianchi_mobile_recommend_train_user limit 50000;

SELECT count(1) FROM data.a;
4.時間跨度:從2014-11-18到2014-12-18

建模分析

基于AARRR模型分析用戶購物情況

-- aquisition用戶獲?。骸救招略鲇脩魯?shù)分析:詳見下文day_0】

-- activation用戶激活:【流量分析:從整體、局部(每日)分別分析】

-- 整體了解:uv、pv以及pv/uv(人均頁面訪問數(shù))各是多少
select count(distinct user_id) as 'UV',
(select count(*) from data.a where behavior_type ='1') as 'PV',
(select count(*) from data.a where behavior_type ='1')/count(distinct user_id) as 'PV/UV'
from data.a;
5.PNG
-- 每日了解:日PV、日uv、人均頁面訪問數(shù)是多少?
select b1.dates,b1.日uv,b2.日pv,b2.日pv /b1.日uv as'每天的人均頁面訪問數(shù)' from
(select dates,count(distinct user_id) as '日uv'
from a
group by dates) b1
left join (
select dates,count(user_id) as'日pv'
from a
where behavior_type ='1'
group by dates) b2
on b1.dates=b2.dates
6.PNG
-- 每日的分析(1-4,分別表示點擊pv、購買buy、加購物車cart、喜歡fav)
select dates,count(1) as '每日的總數(shù)',
sum(case when behavior_type=1 then 1 else 0 end) as'pv',
sum(case when behavior_type=2 then 1 else 0 end) as'buy',
sum(case when behavior_type=3 then 1 else 0 end) as'cart',
sum(case when behavior_type=4 then 1 else 0 end) as'fav'
from data.a
group by dates
7.PNG
-- 每時刻的分析
select hours,count(1) as '該時刻的總數(shù)',
sum(case when behavior_type=1 then 1 else 0 end) as'pv',
sum(case when behavior_type=2 then 1 else 0 end) as'buy',
sum(case when behavior_type=3 then 1 else 0 end) as'cart',
sum(case when behavior_type=4 then 1 else 0 end) as'fav'
from a 
group by hours
8.PNG
-- # 跳失率計算:只有點擊行為的用戶/總用戶數(shù)
select count(distinct user_id) as '只有點擊行為的用戶'
,concat(cast(count(distinct user_id) /410*100 as DECIMAL(18,2)),'%') as '跳失率'
from data.a
where user_id not in(select distinct user_id from data.a where behavior_type ='2')
and user_id not in(select distinct user_id from data.a where behavior_type ='3')
and user_id not in(select distinct user_id from data.a where behavior_type ='4')

-- 結(jié)果顯示只有點擊1pv行為沒有收藏4fav、加購物車3cart以及購買2buy行為的總用戶數(shù)是71,除以總用戶數(shù)UV 410為17%
-- 跳出率為17%
9.concat(cast(count(distinct user_id) /410*100 as DECIMAL(18,2)),'%') as '跳失率'

-- retention用戶留存:【留存分析】

-- 獲取每個用戶的使用時間與第一次使用時間
create view v0
as
select a0.user_id , a0.dates ,a1.firstday
from 
(select user_id , dates  from a
group by user_id , dates) as a0
inner join (
select user_id , min(dates) as firstday from a
group by user_id) as a1
on a0.user_id = a1.user_id
order by a0.user_id , a0.dates
-- 第一次使用時間和使用時間的間隔by_day
create view v1
as 
select user_id ,dates,firstday,datediff(dates,firstday) as by_day
from v0
-- 提取字段作為列名,計算留存日的用戶數(shù)量
create view v2
as 
select firstday, 
sum(case when by_day=0 then 1 else 0 end) as 'day_0',
sum(case when by_day=1 then 1 else 0 end) as 'day_1',sum(case when by_day=2 then 1 else 0 end) as 'day_2',
sum(case when by_day=3 then 1 else 0 end) as 'day_3',sum(case when by_day=4 then 1 else 0 end) as 'day_4',
sum(case when by_day=5 then 1 else 0 end) as 'day_5',sum(case when by_day=6 then 1 else 0 end) as 'day_6',
sum(case when by_day=7 then 1 else 0 end) as 'day_7',sum(case when by_day=15 then 1 else 0 end) as 'day_15',
sum(case when by_day=30 then 1 else 0 end) as 'day_30'
from v1
group by firstday 
order by firstday;
  • day_0即當(dāng)天新增人數(shù)


    9【時間跨度:從2014-11-18到2014-12-18(day_0即當(dāng)天新增人數(shù))】.PNG
-- 留存率
select firstday, day_0,
concat(cast((day_1/day_0)*100 as DECIMAL(18,2)),'%') as 'day_1%',
concat(cast((day_2/day_0)*100 as DECIMAL(18,2)),'%') as 'day_2%',
concat(cast((day_3/day_0)*100 as DECIMAL(18,2)),'%') as 'day_3%',
concat(cast((day_4/day_0)*100 as DECIMAL(18,2)),'%') as 'day_4%',
concat(cast((day_5/day_0)*100 as DECIMAL(18,2)),'%') as 'day_5%',
concat(cast((day_6/day_0)*100 as DECIMAL(18,2)),'%') as 'day_6%',
concat(cast((day_7/day_0)*100 as DECIMAL(18,2)),'%') as 'day_7%',
concat(cast((day_15/day_0)*100 as DECIMAL(18,2)),'%') as 'day_15%',
concat(cast((day_30/day_0)*100 as DECIMAL(18,2)),'%') as 'day_30%'
from v2
10.PNG

-- revenue用戶收益

-- 【復(fù)購分析】復(fù)購率是多少?
DROP VIEW IF EXISTS f;
create view f
as
select user_id,count(1) as '購買次數(shù)' 
from a
where behavior_type='2'
group by user_id
having count(behavior_type) >=2  -- 此處使用【'購買次數(shù)'>=2】會報錯;正確方式:購買次數(shù)>=2(不需要加‘’)
order by 購買次數(shù) desc
11.PNG
-- 有購買行為的消費用戶數(shù)
select count(distinct user_id) 
from a
where behavior_type='2'

-- 計算復(fù)購率【復(fù)購率為70%】
select (select count(1) from f)/(select count(distinct user_id) from a where behavior_type='2') as '復(fù)購率'
from a  limit 1;  -- 此處要加limit 1 
12.PNG
-- 復(fù)購頻數(shù)分類
select 購買次數(shù),count(user_id)  as 人數(shù)
from f
group by 購買次數(shù)  -- 【購買次數(shù)】不需要加''
order by 購買次數(shù) desc
13.PNG

-- refer用戶推薦:【轉(zhuǎn)化率分析 + 漏斗分析】購物車cart-3轉(zhuǎn)化率、收藏fav-4轉(zhuǎn)化率、購買buy-2轉(zhuǎn)化率各是多少?

轉(zhuǎn)化類型函數(shù):CAST (expression AS data_type)
ROUND(expression,2)

select concat(cast(
(select count(1) from a where behavior_type='3')/
(select count(1) from a where behavior_type='1')*100 as Decimal(18,2)),'%') as '購物車轉(zhuǎn)化率',
concat(cast(
(select count(1) from a where behavior_type='4')/
(select count(1) from a where behavior_type='1')*100 as Decimal(18,2)),'%') as '收藏轉(zhuǎn)化率',
concat(cast(
(select count(1) from a where behavior_type='2')/
(select count(1) from a where behavior_type='1')*100 as Decimal(18,2)),'%') as 購買轉(zhuǎn)化率
from a limit 1 ;
14.PNG

最終得到【購物車轉(zhuǎn)化率, 收藏轉(zhuǎn)化率, 購買轉(zhuǎn)化率分別為2.74%, 0.90%, 2.30%】。

用戶行為的漏斗:以行為發(fā)生與否為研究對象,有此行為即+1;
UV(獨立訪客)轉(zhuǎn)化的漏斗:以每個用戶為研究對象。

-- 用戶行為的漏斗計算:
select behavior_type,count(user_id)
from a 
group by behavior_type

-- UV(獨立訪客)轉(zhuǎn)化的漏斗計算:
select behavior_type,count(distinct user_id) AS DIS_user
from a 
group by behavior_type
15-1.PNG

15-2.PNG

-- 商品分析:統(tǒng)計出受歡迎的產(chǎn)品item_id以及類目item_categary,優(yōu)化產(chǎn)品銷售。

-- 按產(chǎn)品item_id分析
select product_buytimes,count(*) as product_count from(
select item_id,count(user_id) as product_buytimes
from a 
where behavior_type='2'
group by item_id) as  product_buypool
group by product_buytimes
order by product_buytimes desc;
16.PNG
-- 類目item_categary分析
select cat_buytimes,count(*) as cat_count from(
select item_category,count(user_id) as cat_buytimes
from a 
where behavior_type='2'
group by item_category) as  cat_buypool
group by cat_buytimes
order by cat_buytimes desc;
17.PNG

基于RFM模型進行用戶分類

-- R等級分類
-- 1.建立r視圖,將近期購買時間提取到R臨時表中
create view r
as
select user_id ,max(dates) as '近期購買時間' 
from a 
where behavior_type='2'
group by user_id

select max(dates)
from a -- 2014-12-18

-- 2.建立R等級劃分視圖:將客戶近期購買時間進行等級劃分,越接近2017-12-04號R越大;
-- 構(gòu)建R數(shù)值型(用于后續(xù)的計算全體用戶的平均值)和'R'字符串型(用于計算每個等級的數(shù)量)
create view R等級劃分
as
select user_id,近期購買時間,datediff('2014-12-18',近期購買時間)as 距今天數(shù),
(case 
when datediff('2014-12-18',近期購買時間)<=2 then 5
when datediff('2014-12-18',近期購買時間)<=4 then 4
when datediff('2014-12-18',近期購買時間)<=6 then 3
when datediff('2014-12-18',近期購買時間)<=8 then 2
else 1 end) as R,
(case 
when datediff('2014-12-18',近期購買時間)<=2 then '5'
when datediff('2014-12-18',近期購買時間)<=4 then '4'
when datediff('2014-12-18',近期購買時間)<=6 then '3'
when datediff('2014-12-18',近期購買時間)<=8 then '2'
else '1' end) as R值
from r
18.PNG
-- F等級劃分
-- 1.建立f視圖
create view f 
as 
select user_id ,count(user_id) as '購買次數(shù)' 
from a 
where behavior_type='2'
group by user_id

-- 建立F等級劃分
create view F等級劃分
as
select user_id,購買次數(shù),
(case when 購買次數(shù)<=2 then 1
when 購買次數(shù)<=4 then 2
when 購買次數(shù)<=6 then 3
when 購買次數(shù)<=8 then 4
else 5 end) as 'F',
(case when 購買次數(shù)<=2 then '1'
when 購買次數(shù)<=4 then '2'
when 購買次數(shù)<=6 then '3'
when 購買次數(shù)<=8 then '4'
else '5' end) as 'F值'
from f 
19.PNG
-- 建立RFM模型
-- 1.R平均值(因為不區(qū)分大小寫:【R等級劃分】=【r等級劃分】)
SELECT avg(R) as 'R平均值' FROM R等級劃分;   -- 2.8462

-- 2.F平均值
select avg(F) as 'F平均值' from  F等級劃分;    -- 3.2353

-- 3.用戶八大類等級劃分,由于該數(shù)據(jù)沒有M值,故只建立了4個分類
create view RFM匯總
as
select a.*,b.F,b.F值,
(case
when a.R>2.8462 and b.F>3.2353 then '重要高價值客戶'
when a.R<2.8462 and b.F>3.2353 then '重要喚回客戶'
when a.R>2.8462 and b.F<3.2353 then '重要深耕客戶'
when a.R<2.8462 and b.F<3.2353 then '重要挽留客戶'
END
) as 客戶分類
from R等級劃分 a, F等級劃分 b
where a.user_id=b.user_id
20.PNG
select 客戶分類,count(1) from RFM匯總
group by 客戶分類
21.PNG

結(jié)論:

1.基于AARRR模型:跳出率為17%;復(fù)購率70%;購物車轉(zhuǎn)化率, 收藏轉(zhuǎn)化率, 購買轉(zhuǎn)化率分別為2.74%, 0.90%, 2.30%。針對收藏商品但還沒購買的用戶,可通過短信、APP彈窗push等方式提醒用戶去購物車去完成支付,進一步提高購買率。

2.時間維度分析:用戶活躍時間集中在19點到23點(峰值在21點),可在該活躍時間推送新品、商家折扣優(yōu)惠及促銷活動,提高購買率。

3.商品分析:對于熱銷的商品,可以提高曝光率以及多推出一些和該商品相關(guān)的其他商品捆綁 / 交叉銷售,提高商品的銷量。多種方式進行客戶關(guān)系維護:購買升級、交叉銷售、追加銷售等。

4.基于RFM模型:通過RFM模型對用戶進行劃分等級,對每個用戶進行精準(zhǔn)化營銷;同時可以對R和F的數(shù)據(jù)進行監(jiān)控,推測客戶消費的異常情況,挽回流失客戶。

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

友情鏈接更多精彩內(nèi)容