HIVE 窗口分析函數(shù)-SUM、AVG、MIN、MAX

數(shù)據(jù)準備

CREATE EXTERNAL TABLE lxw1234 (
cookieid string,
createtime string,   --day 
pv INT
) ROW FORMAT DELIMITED 
FIELDS TERMINATED BY ',' 
stored as textfile location '/tmp/lxw11/';
 
DESC lxw1234;
cookieid                STRING 
createtime              STRING 
pv INT 
 
hive> select * from lxw1234;
OK
cookie1 2015-04-10      1
cookie1 2015-04-11      5
cookie1 2015-04-12      7
cookie1 2015-04-13      3
cookie1 2015-04-14      2
cookie1 2015-04-15      4
cookie1 2015-04-16      4

SUM — 注意,結(jié)果和ORDER BY相關(guān),默認為升序

SELECT cookieid,
createtime,
pv,
SUM(pv) OVER(PARTITION BY cookieid ORDER BY createtime) AS pv1, -- 默認為從起點到當前行
SUM(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS pv2, --從起點到當前行,結(jié)果同pv1 
SUM(pv) OVER(PARTITION BY cookieid) AS pv3,                             --分組內(nèi)所有行
SUM(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) AS pv4,   --當前行+往前3行
SUM(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND 1 FOLLOWING) AS pv5,    --當前行+往前3行+往后1行
SUM(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS pv6   ---當前行+往后所有行  
FROM lxw1234;
 
cookieid createtime     pv      pv1     pv2     pv3     pv4     pv5      pv6 
-----------------------------------------------------------------------------
cookie1  2015-04-10      1       1       1       26      1       6       26
cookie1  2015-04-11      5       6       6       26      6       13      25
cookie1  2015-04-12      7       13      13      26      13      16      20
cookie1  2015-04-13      3       16      16      26      16      18      13
cookie1  2015-04-14      2       18      18      26      17      21      10
cookie1  2015-04-15      4       22      22      26      16      20      8
cookie1  2015-04-16      4       26      26      26      13      13      4

pv1: 分組內(nèi)從起點到當前行的pv累積,如,11號的pv1=10號的pv+11號的pv, 12號=10號+11號+12號
pv2: 同pv1
pv3: 分組內(nèi)(cookie1)所有的pv累加
pv4: 分組內(nèi)當前行+往前3行,如,11號=10號+11號, 12號=10號+11號+12號, 13號=10號+11號+12號+13號, 14號=11號+12號+13號+14號
pv5: 分組內(nèi)當前行+往前3行+往后1行,如,14號=11號+12號+13號+14號+15號=5+7+3+2+4=21
pv6: 分組內(nèi)當前行+往后所有行,如,13號=13號+14號+15號+16號=3+2+4+4=13,14號=14號+15號+16號=2+4+4=10

如果不指定ROWS BETWEEN,默認為從起點到當前行;
如果不指定ORDER BY,則將分組內(nèi)所有值累加;
關(guān)鍵是理解ROWS BETWEEN含義,也叫做WINDOW子句:
PRECEDING:往前
FOLLOWING:往后
CURRENT ROW:當前行
UNBOUNDED:起點,UNBOUNDED PRECEDING 表示從前面的起點, UNBOUNDED FOLLOWING表示到后面的終點


AVG,MINMAX,和SUM用法一樣

-- AVG
SELECT cookieid,
createtime,
pv,
AVG(pv) OVER(PARTITION BY cookieid ORDER BY createtime) AS pv1, -- 默認為從起點到當前行
AVG(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS pv2, --從起點到當前行,結(jié)果同pv1 
AVG(pv) OVER(PARTITION BY cookieid) AS pv3,                             --分組內(nèi)所有行
AVG(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) AS pv4,   --當前行+往前3行
AVG(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND 1 FOLLOWING) AS pv5,    --當前行+往前3行+往后1行
AVG(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS pv6   ---當前行+往后所有行  
FROM lxw1234; 
cookieid createtime     pv      pv1     pv2     pv3     pv4     pv5      pv6 
-----------------------------------------------------------------------------
cookie1 2015-04-10      1       1.0     1.0     3.7142857142857144      1.0     3.0     3.7142857142857144
cookie1 2015-04-11      5       3.0     3.0     3.7142857142857144      3.0     4.333333333333333       4.166666666666667
cookie1 2015-04-12      7       4.333333333333333       4.333333333333333       3.7142857142857144      4.333333333333333       4.0     4.0
cookie1 2015-04-13      3       4.0     4.0     3.7142857142857144      4.0     3.6     3.25
cookie1 2015-04-14      2       3.6     3.6     3.7142857142857144      4.25    4.2     3.3333333333333335
cookie1 2015-04-15      4       3.6666666666666665      3.6666666666666665      3.7142857142857144      4.0     4.0     4.0
cookie1 2015-04-16      4       3.7142857142857144      3.7142857142857144      3.7142857142857144      3.25    3.25    4.0
-- MIN
SELECT cookieid,
createtime,
pv,
MIN(pv) OVER(PARTITION BY cookieid ORDER BY createtime) AS pv1, -- 默認為從起點到當前行
MIN(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS pv2, --從起點到當前行,結(jié)果同pv1 
MIN(pv) OVER(PARTITION BY cookieid) AS pv3,                                                                                                                             --分組內(nèi)所有行
MIN(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) AS pv4,   --當前行+往前3行
MIN(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND 1 FOLLOWING) AS pv5,    --當前行+往前3行+往后1行
MIN(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS pv6   ---當前行+往后所有行  
FROM lxw1234;
 
cookieid createtime     pv      pv1     pv2     pv3     pv4     pv5      pv6 
-----------------------------------------------------------------------------
cookie1 2015-04-10      1       1       1       1       1       1       1
cookie1 2015-04-11      5       1       1       1       1       1       2
cookie1 2015-04-12      7       1       1       1       1       1       2
cookie1 2015-04-13      3       1       1       1       1       1       2
cookie1 2015-04-14      2       1       1       1       2       2       2
cookie1 2015-04-15      4       1       1       1       2       2       4
cookie1 2015-04-16      4       1       1       1       2       2       4
-- MAX
SELECT cookieid,
createtime,
pv,
MAX(pv) OVER(PARTITION BY cookieid ORDER BY createtime) AS pv1, -- 默認為從起點到當前行
MAX(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS pv2, --從起點到當前行,結(jié)果同pv1 
MAX(pv) OVER(PARTITION BY cookieid) AS pv3,                             --分組內(nèi)所有行
MAX(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND CURRENT ROW) AS pv4,   --當前行+往前3行
MAX(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN 3 PRECEDING AND 1 FOLLOWING) AS pv5,    --當前行+往前3行+往后1行
MAX(pv) OVER(PARTITION BY cookieid ORDER BY createtime ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS pv6   ---當前行+往后所有行  
FROM lxw1234;
 
cookieid createtime     pv      pv1     pv2     pv3     pv4     pv5      pv6 
-----------------------------------------------------------------------------
cookie1 2015-04-10      1       1       1       7       1       5       7
cookie1 2015-04-11      5       5       5       7       5       7       7
cookie1 2015-04-12      7       7       7       7       7       7       7
cookie1 2015-04-13      3       7       7       7       7       7       4
cookie1 2015-04-14      2       7       7       7       7       7       4
cookie1 2015-04-15      4       7       7       7       7       7       4
cookie1 2015-04-16      4       7       7       7       4       4       4
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容