調(diào)試環(huán)境
我使用的調(diào)試的環(huán)境是:
- idea
- sqoop 1.99.6
調(diào)試使用的代碼
/**
* Created by zj on 16-6-24.
*/
import org.apache.sqoop.client.SqoopClient;
import org.apache.sqoop.model.*;
import org.apache.sqoop.validation.*; //for status class
import org.apache.sqoop.submission.counter.*;
import java.util.Collections;
import java.util.*;
public class DT {
//a helper function . so that user has to iterate the list of validation messages.
// printMessage(link.getConnectorLinkConfig().getConfigs());
private static void printMessage(List<MConfig> configs) {
for (MConfig config : configs) {
List<MInput<?>> inputlist = config.getInputs();
if (config.getValidationMessages() != null) {
// print every validation message
for (Message message : config.getValidationMessages()) {
System.out.println("Config validation message: " + message.getMessage());
}
}
for (MInput minput : inputlist) {
if (minput.getValidationStatus() == Status.WARNING) {
for (Message message : config.getValidationMessages()) {
System.out.println("Config Input Validation Warning: " + message.getMessage());
}
} else if (minput.getValidationStatus() == Status.ERROR) {
for (Message message : config.getValidationMessages()) {
System.out.println("Config Input Validation Error: " + message.getMessage());
}
}
}
}
}
public static void main(String [] args){
//Initialization
String url = "http://localhost:12000/sqoop/";
SqoopClient client = new SqoopClient(url);
Collection<MConnector> connectorCollections=client.getConnectors();
for(MConnector mc: connectorCollections) {
System.out.println(mc.getUniqueName());
System.out.println(mc.getClassName());
System.out.println(mc.getPersistenceId());
}
/* I have create the link before, and the link object is persisted in the database of derby
// create a placeholder for link
long connectorId = 1;
MLink link = client.createLink(connectorId);
link.setName("mysql_link");
link.setCreationUser("zjlearn");
MLinkConfig linkConfig = link.getConnectorLinkConfig();
// fill in the link config values
linkConfig.getStringInput("linkConfig.connectionString").setValue("jdbc:mysql://localhost/my");
linkConfig.getStringInput("linkConfig.jdbcDriver").setValue("com.mysql.jdbc.Driver");
linkConfig.getStringInput("linkConfig.username").setValue("root");
linkConfig.getStringInput("linkConfig.password").setValue("root");
// save the link object that was filled
Status status = client.saveLink(link);
if(status.canProceed()) {
System.out.println("Created Link with Link Id : " + link.getPersistenceId());
} else {
System.out.println("Something went wrong creating the link");
}
//create the link2
long connectorId2 = 2;
MLink link2 = client.createLink(connectorId2);
link2.setName("hdfs_link");
link2.setCreationUser("zjlearn");
MLinkConfig linkConfig2 = link.getConnectorLinkConfig();
// fill in the link config values , for hdfs connector
// save the link object that was filled
Status status2 = client.saveLink(link);
if(status2.canProceed()) {
System.out.println("Created Link with Link Id : " + link2.getPersistenceId());
} else {
System.out.println("Something went wrong creating the link");
}
*/
//create the job
//Creating dummy job object
long fromLinkId = 1;// link for jdbc connector
long toLinkId = 2; // link for HDFS connector
MJob job = client.createJob(fromLinkId, toLinkId);
job.setName("mysql_hafs_dt");
job.setCreationUser("zjlearn");
// set the "FROM" link job config values
MFromConfig fromJobConfig = job.getFromJobConfig();
fromJobConfig.getStringInput("fromJobConfig.schemaName").setValue("sqoop");
fromJobConfig.getStringInput("fromJobConfig.tableName").setValue("sqoop");
fromJobConfig.getStringInput("fromJobConfig.partitionColumn").setValue("id");
// set the "TO" link job config values
MToConfig toJobConfig = job.getToJobConfig();
toJobConfig.getStringInput("toJobConfig.outputDirectory").setValue("/usr/tmp");
// set the driver config values
MDriverConfig driverConfig = job.getDriverConfig();
driverConfig.getStringInput("throttlingConfig.numExtractors").setValue("3");
Status status3 = client.saveJob(job);
if(status3.canProceed()) {
System.out.println("Created Job with Job Id: "+ job.getPersistenceId());
} else {
System.out.println("Something went wrong creating the job");
}
//start the job
long jobId= job.getPersistenceId();
MSubmission Jobsubmission = client.startJob(jobId);
System.out.println("Job Submission Status : " + Jobsubmission.getStatus());
if(Jobsubmission.getStatus().isRunning() && Jobsubmission.getProgress() != -1) {
System.out.println("Progress : " + String.format("%.2f %%", Jobsubmission.getProgress() * 100));
}
System.out.println("Hadoop job id :" + Jobsubmission.getExternalJobId()); //the method has change getExternalJobId
System.out.println("Job link : " + Jobsubmission.getExternalLink());
Counters counters = Jobsubmission.getCounters();
if(counters != null) {
System.out.println("Counters:");
for(CounterGroup group : counters) {
System.out.print("\t");
System.out.println(group.getName());
for(Counter counter : group) {
System.out.print("\t\t");
System.out.print(counter.getName());
System.out.print(": ");
System.out.println(counter.getValue());
}
}
}
//there is no method getExceptionInfo in the Jobsubmission in the sqoop2 but a getErroe method
if(Jobsubmission.getError() != null) {
System.out.println("Exception info : " +Jobsubmission.getError());
}
//Check job status for a running job
MSubmission jobStatusSubmission = client.getJobStatus(jobId);
if(jobStatusSubmission.getStatus().isRunning() && jobStatusSubmission.getProgress() != -1) {
System.out.println("Progress : " + String.format("%.2f %%", jobStatusSubmission.getProgress() * 100));
}
//Stop a running job. there is an error in the web page
client.stopJob(jobId);
//get the execute
}
}
maven依賴
下面是項(xiàng)目使用到的依賴
<dependency>
<groupId>org.apache.sqoop</groupId>
<artifactId>sqoop-client</artifactId>
<version>1.99.6</version>
</dependency>
<dependency>
<groupId>org.apache.sqoop</groupId>
<artifactId>sqoop-server</artifactId>
<version>1.99.6</version>
</dependency>
代碼調(diào)試方法
這部分主要參考
- http://blog.csdn.net/xichenguan/article/details/39228301
- http://qifuguang.me/2015/09/18/IntelliJ%E8%BF%9C%E7%A8%8B%E8%B0%83%E8%AF%95%E6%95%99%E7%A8%8B/
- http://linux.it.net.cn/e/server/Tomcat/2015/0202/12850.html
Sqoop2是一個CS的架構(gòu),客戶端包括sqoop-shell和sqoop-client,服務(wù)器端包括sqoop-server,sqoop-server就是一個部署在Tomcat下的web應(yīng)用,由幾個servlet組成。
調(diào)試sqoop-shell
調(diào)試sqoop-shell就是在啟動sqoop client main方法時,加上調(diào)試參數(shù)。
sqoop-shell的啟動方式為:sqoop.sh client,找到解決問題的起點(diǎn),查找sqoop.sh腳本,在其中找到這樣一段:
client)
# Build class path with full path to each library
for f in $CLIENT_LIB/*.jar; do
CLASSPATH="${CLASSPATH}:$f"
done
EXEC_JAVA='java'
if [ -n "${JAVA_HOME}" ] ; then
EXEC_JAVA="${JAVA_HOME}/bin/java"
fi
${EXEC_JAVA} -classpath ${CLASSPATH} org.apache.sqoop.shell.SqoopShell $2
;;
這一部分的代碼是運(yùn)行sqoop.sh client 命令時將會執(zhí)行的代碼??梢钥吹剑?/p>
${EXEC_JAVA} -classpath ${CLASSPATH} org.apache.sqoop.shell.SqoopShell $2,
這行shell腳本就是啟動sqoop客戶端的main方法的地方了,在其中加入:
-Xdebug -Xnoagent-Xrunjdwp:transport=dt_socket,address=8199,server=y,suspend=n
這一行代碼成了下面這樣:
${EXEC_JAVA} -classpath ${CLASSPATH} -Xdebug-Xnoagent -Xrunjdwp:transport=dt_socket,address=8199,server=y,suspend=norg.apache.sqoop.shell.SqoopShell $2
** 注意,放到一行里,不要換行。Server不是指的IP。**
參數(shù)說明:
-Xdebug : 啟用調(diào)試模式
-Xrunjdwp<sub-options> : 加載JVM的JPDA參考實(shí)現(xiàn)庫
transport=dt_socket :Socket連接,可選dt_shmem 通過共享內(nèi)存的方式連接到調(diào)試服務(wù)器
address=8000 :調(diào)試服務(wù)器監(jiān)聽的端口
server=y : 是否是服務(wù)器端,n為客戶端
suspend=n : 啟動過程是否加載暫停,y為啟動時暫停,方便調(diào)試啟動過程
參數(shù)具體的意義可以查看相關(guān)文檔,IBM上有個系列的文章,是專門講JAVA的調(diào)試體系的,叫做《深入 Java 調(diào)試體系深入 Java 調(diào)試體系深入JAVA調(diào)試體系》,講的很細(xì)很好,相信對于很多人來說,會填補(bǔ)一塊JAVA知識的空白。
這樣就設(shè)置好了,當(dāng)運(yùn)行sqoop.sh client 啟動sqoop shell的時候,會看到輸出中包含下面的內(nèi)容:
Listening for transport dt_socket ataddress: 8199
在IDE,比如Eclipse里,選擇遠(yuǎn)程調(diào)試,在Eclipse中是Remote Java Application。主要是填程序所在機(jī)器的網(wǎng)絡(luò)地址和端口號,在這個例子中,端口號就是8199。
Eclipse這端開始Debug之后,設(shè)置好斷點(diǎn),然后再Sqoop所在的機(jī)器啟動的sqoopshell中進(jìn)行操作,運(yùn)行到斷點(diǎn),就會停住,在Eclipse這端跟調(diào)試本地程序一樣。
調(diào)試sqoop-server
因?yàn)閟qoop-server就是一個JavaWeb應(yīng)用,所以設(shè)置sqoop-server遠(yuǎn)程調(diào)試,就是設(shè)置Tomcat為遠(yuǎn)程調(diào)試。運(yùn)行Tomcat的catalina.sh命令可以看到,Tomcat已經(jīng)為我們提供了jpda選項(xiàng):(sqoop2的相關(guān)的catalina.sh文件在SQOOP_HOME/server/bin文件夾下面, 運(yùn)行其可以得到如下的信息)
Usage: catalina.sh ( commands ... )
commands:
debug Start Catalina in a debugger
debug -security Debug Catalina with a security manager
jpda start Start Catalina under JPDA debugger
run Start Catalina in the current window
run -security Start in the current window with security manager
start Start Catalina in a separate window
start -security Start in a separate window with security manager
stop Stop Catalina, waiting up to 5 seconds for the process to end
stop n Stop Catalina, waiting up to n seconds for the process to end
stop -force Stop Catalina, wait up to 5 seconds and then use kill -KILL if still running
stop n -force Stop Catalina, wait up to n seconds and then use kill -KILL if still running
version What version of tomcat are you running?
Note: Waiting for the process to end and use of the -force option require that $CATALINA_PID is defined
由于這個內(nèi)置的Tomcat的啟動是由sqoop.sh腳本控制的,啟動命令如下:
sqoop.sh server start
于是去查看sqoop.sh腳本的代碼,找到如下部分:
server)
if [ $# = 1 ]; then
echo "Usage: sqoop.sh server <start/stop>"
exit
fi
actionCmd=$2
source ${BASEDIR}/bin/sqoop-sys.sh
setup_catalina_opts
# There seems to be a bug in catalina.sh whereby catalina.sh doesn't respect
# CATALINA_OPTS when stopping the tomcat server. Consequently, we have to hack around
# by specifying the CATALINA_OPTS properties in JAVA_OPTS variable
if [ "$actionCmd" == "stop" ]; then
export JAVA_OPTS="$JAVA_OPTS $CATALINA_OPTS"
fi
# Remove the first 2 command line arguments (server and action command (start/stop)) so we can pass
# the rest to catalina.sh script
shift
shift
$CATALINA_BIN/catalina.sh $actionCmd "$@"
;;
從sqoop.sh server start命令來看,actionCmd就是start了,也就是說,sqoop.sh server start命令最終執(zhí)行catalina.sh命令時是傳入的start,即catalina.sh start。我們想運(yùn)行:
catalina.sh jpda start
于是把原來
$CATALINA_BIN/catalina.sh $actionCmd "$@"
這行腳本的中直接加入jpda,最終的腳本為:
$CATALINA_BIN/catalina.sh jpda $actionCmd"$@"
上邊我們設(shè)置sqoop shell 的jpda的時候,是在JVM啟動時加入?yún)?shù),但是從catalina.sh打印出的幫助信息來看,沒有傳入?yún)?shù)的地方,想到可能是在配置文件或者腳本中有環(huán)境變量:
最后在catalina.sh中找到下面的配置:
if [ "$1" = "jpda" ] ; then
if [ -z "$JPDA_TRANSPORT" ]; then
JPDA_TRANSPORT="dt_socket"
fi
if [ -z "$JPDA_ADDRESS" ]; then
JPDA_ADDRESS="8000"
fi
if [ -z "$JPDA_SUSPEND" ]; then
JPDA_SUSPEND="n"
fi
if [ -z "$JPDA_OPTS" ]; then
JPDA_OPTS="-agentlib:jdwp=transport=$JPDA_TRANSPORT,address=$JPDA_ADDRESS,server=y,suspend=$JPDA_SUSPEND"
fi
CATALINA_OPTS="$CATALINA_OPTS $JPDA_OPTS"
shift
fi
這是默認(rèn)的配置,也可以自己更改。
完成上邊的配置后,在Eclipse這端,調(diào)試sqoop-server項(xiàng)目,Remote Java Application,填上遠(yuǎn)程JVM地址,端口號,這個例子中是8000,點(diǎn)擊debug就可以了。