- 下載 Dinky 1.0.3 版本到 EC2 ,解壓,本文假設(shè) Dinky 安裝目錄在/opt/dinky/
cd /opt/
sudo wget https://github.com/DataLinkDC/dinky/releases/download/v1.2.1/dinky-release-1.17-1.2.1.tar.gz
sudo tar -xvf dinky-release-1.17-1.2.1.tar.gz
sudo mv dinky-release-1.17-1.2.1 dinky
dinky-release-1.17-1.2.1.tar.gz
注意版本對應(yīng)關(guān)系,紅色部分為dinky支持flink的版本,棕色為dinky版本
- 安裝JDK并配置JAVA_HOME 環(huán)境變量
- 復(fù)制 EMR 配置文件到 Dinky 服務(wù)器
sudo mkdir -p /etc/flink/conf
sudo scp -r hadoop@<EMR Master IP>:/etc/alternatives/flink-conf/* /etc/flink/conf/
sudo mkdir -p /etc/hadoop/conf
sudo scp -r hadoop@<EMR Master IP>:/etc/alternatives/hadoop-conf/* /etc/hadoop/conf/
- 準(zhǔn)備 mysql jdbc 驅(qū)動 jar 包
sudo wget https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.28/mysql-connector-java-8.0.28.jar
sudo mv mysql-connector-java-8.0.28.jar /opt/dinky/lib/
sudo chmod 777 /opt/dinky/lib/mysql-connector-java-8.0.28.jar
- Dinky 需要具備內(nèi)置的 Flink 環(huán)境,將 EMR Flink 相關(guān)環(huán)境 jar 包復(fù)制到 Dinky 服務(wù)器
sudo scp -r -i /home/ec2-user/key.pem hadoop@<EMR Master IP>:/usr/lib/flink/lib/* /opt/dinky/extends/flink1.17/
sudo rm /opt/dinky/extends/flink1.17/flink-table-planner-loader-1.17.1-amzn-1.jar
sudo scp -r -i /home/ec2-user/key.pem hadoop@<EMR Master IP>:/usr/lib/flink/opt/flink-table-planner_2.12-1.17.1-amzn-1.jar /opt/dinky/extends/flink1.17/
sudo scp -r -i /home/ec2-user/key.pem hadoop@<EMR Master IP>:/usr/lib/hadoop/*.jar /opt/dinky/customJar/
sudo scp -r -i /home/ec2-user/key.pem hadoop@<EMR Master IP>:/usr/lib/hadoop/client/*.jar /opt/dinky/customJar/
sudo scp -r -i /home/ec2-user/key.pem hadoop@<EMR Master IP>:/usr/share/aws/emr/emrfs/lib/*.jar /opt/dinky/customJar/
sudo scp -r -i /home/ec2-user/key.pem hadoop@<EMR Master IP>:/usr/lib/flink/plugins/s3/*.jar /opt/dinky/customJar/
- Dinky 任務(wù)在提交到 EMR 后,需要在 HDFS 中尋找依賴 jar 包,所以需要將 dinky 的部分 jar 包上傳到 EMR HDFS。
#以下命令在Dinky服務(wù)器執(zhí)行
sudo scp -r /opt/dinky/jar/dinky-app-1.17-1.0.3-jar-with-dependencies.jar hadoop@<EMR Master IP>:/home/hadoop/dinky/
sudo scp -r /opt/dinky/lib/mysql-connector-java-8.0.28.jar hadoop@<EMR Master IP>:/home/hadoop/dinky/
#以下命令在EMR Master節(jié)點執(zhí)行
hdfs dfs -mkdir /user/hadoop/dinky/
hdfs dfs -mkdir /user/hadoop/flink/
hdfs dfs -mkdir /user/hadoop/flink/lib/
hdfs dfs -put /home/hadoop/dinky/dinky-app-1.17-1.0.3-jar-with-dependencies.jar /user/hadoop/dinky/
hdfs dfs -put /home/hadoop/dinky/mysql-connector-java-8.0.28.jar /user/hadoop/flink/lib/
hdfs dfs -put /usr/lib/flink/lib/* /user/hadoop/flink/lib/
hdfs dfs -put /usr/lib/flink/plugins/s3/* /user/hadoop/flink/lib/
hdfs dfs -rm /user/hadoop/flink/lib/flink-table-planner-loader-1.17.1-amzn-1.jar
hdfs dfs -put /usr/lib/flink/opt/flink-table-planner_2.12-1.17.1-amzn-1.jar /user/hadoop/flink/lib/
- 啟動 Dinky
# 啟動
cd /opt/dinky/
sudo bash auto.sh start
使用默認(rèn)用戶名/密碼:admin/dinky123!@# 瀏覽器登陸 ip:8888 即可進(jìn)入 Dinky 界面
- 配置dinky flink cluster
在 Dinky 控制臺點擊注冊中心,進(jìn)入后點擊左側(cè)集群-集群配置,進(jìn)入集群配置列表界面,然后點擊新建,創(chuàng)建一個集群。
--創(chuàng)建源表datagen_source
CREATE TABLE datagen_source(
id BIGINT,
name STRING
) WITH (
'connector' = 'datagen'
);
--創(chuàng)建結(jié)果表blackhole_sink
CREATE TABLE blackhole_sink(
id BIGINT,
name STRING
) WITH (
'connector' = 'blackhole'
);
--將源表數(shù)據(jù)插入到結(jié)果表
INSERT INTO blackhole_sink
SELECT
id ,
name
from datagen_source;
選擇 pre-job 模式,提交測試任務(wù)
參考: https://aws.amazon.com/cn/blogs/china/building-an-emr-data-analysis-platform-based-on-open-source-tools-part-two/
https://www.dinky.org.cn/docs/1.1/get_started/overview