- 使用beeline連接到hive
beeline -u "jdbc:hive2://localhost:10000" ' ' ' '
- 創(chuàng)建users表和train表,為了方便, 創(chuàng)建表的hql語句單獨(dú)寫在一個(gè)腳本文件中,我的是
ctable.hql
create external table if not exists users(
user_id bigint,
locale string,
birthdyear int,
gender string,
joinedAt string,
location string,
timezone string
)
row format delimited
fields terminated by ','
stored as textfile
location '/temp/data/usersdata'
tblproperties("skip.header.line.count"="1");
create external table if not exists train(
user_id bigint,
event_id bigint,
invited int,
`timestamp` string,
interested int,
not_interested int
)
row format delimited
fields terminated by ','
stored as textfile
location '/temp/data/traindata'
tblproperties("skip.header.line.count"="1");
說明:
一張hive表在物理上對應(yīng)一個(gè)文件夾。拿第一張表users來說,它指向HDFS上的
/temp/data/usersdata文件夾,usersdata文件夾中存放數(shù)據(jù),這些數(shù)據(jù)是以文件形式存放的,hive表的定義就是對這些文件格式的一種描述。下面對users表的后面幾行做一些說明。
| 語句 | 意思 |
|---|---|
row format delimited |
文件以行的格式分隔 |
fields terminated by ',' |
每個(gè)字段之間以逗號分隔 |
stored as filetext |
文件的儲(chǔ)存格式為textfile |
location '/temp/data/usersdata' |
這張表在HDFS中所指向的文件夾(usersdata) |
tblproperties("skip.header.line.count"="1") |
跳過文件中的第一行數(shù)據(jù) |
創(chuàng)建比較復(fù)雜的表:
create external table if not exists emp(
name string,
work_place array<string>,
sex_age struct<sex:string,age:int>,
skills_score map<string,int>,
depart_title map<string,array<string>>
)
row format delimited
fields terminated by '|'
collection items terminated by ','
map keys terminated by ':'
stored as textfile
location '/temp/data/emp';
注:上面的ctable.hql文件中在執(zhí)行時(shí)如果出現(xiàn)FAILED: ParseException line 7:1 Failed to recognize predicate 'timestamp'. Failed rule: 'identifier' in column specification類似的錯(cuò)誤,說明在表字段名與關(guān)鍵字產(chǎn)生了沖突,解決方法是把字段名改一下或者給字段加上``符號。
- 在hive shell 環(huán)境中執(zhí)行剛才的腳本
ctable.hql,也可以在本地執(zhí)行
// hive shell
source /root/ctable.hql;
// 本地
hive -f "ctable.hql"
注: 在beeline中執(zhí)行腳本會(huì)報(bào)錯(cuò)
- 創(chuàng)建完成后向里面導(dǎo)入準(zhǔn)備好的數(shù)據(jù)
load data local inpath '/root/data/users.csv' into table users;
load data local inpath '/root/data/train.csv' into table train;
注:我是在hive命令行中導(dǎo)入的, 在beeline中執(zhí)行上面的語句會(huì)報(bào)錯(cuò)
- 看一下結(jié)果
select * from users limit 5;
+----------------+---------------+-------------------+---------------+---------------------------+---------------------+-----------------+--+
| users.user_id | users.locale | users.birthdyear | users.gender | users.joinedat | users.location | users.timezone |
+----------------+---------------+-------------------+---------------+---------------------------+---------------------+-----------------+--+
| 3197468391 | id_ID | 1993 | male | 2012-10-02T06:40:55.524Z | Medan Indonesia | 480 |
| 3537982273 | id_ID | 1992 | male | 2012-09-29T18:03:12.111Z | Medan Indonesia | 420 |
| 823183725 | en_US | 1975 | male | 2012-10-06T03:14:07.149Z | Stratford Ontario | -240 |
| 1872223848 | en_US | 1991 | female | 2012-11-04T08:59:43.783Z | Tehran Iran | 210 |
| 3429017717 | id_ID | 1995 | female | 2012-09-10T16:06:53.132Z | | 420 |
+----------------+---------------+-------------------+---------------+---------------------------+---------------------+-----------------+--+
select * from train limit 5;
+----------------+-----------------+----------------+-----------------------------------+-------------------+-----------------------+--+
| train.user_id | train.event_id | train.invited | train.timestamp | train.interested | train.not_interested |
+----------------+-----------------+----------------+-----------------------------------+-------------------+-----------------------+--+
| 3044012 | 1918771225 | 0 | 2012-10-02 15:53:05.754000+00:00 | 0 | 0 |
| 3044012 | 1502284248 | 0 | 2012-10-02 15:53:05.754000+00:00 | 0 | 0 |
| 3044012 | 2529072432 | 0 | 2012-10-02 15:53:05.754000+00:00 | 1 | 0 |
| 3044012 | 3072478280 | 0 | 2012-10-02 15:53:05.754000+00:00 | 0 | 0 |
| 3044012 | 1390707377 | 0 | 2012-10-02 15:53:05.754000+00:00 | 0 | 0 |
+----------------+-----------------+----------------+-----------------------------------+-------------------+-----------------------+--+
.