[本文為草稿,代碼中很多地方都還沒有仔細(xì)研究,等明天會(huì)補(bǔ)上更多的注釋]
這節(jié)課的關(guān)鍵就在于課程中提供的3段代碼,要會(huì)用,會(huì)讀,更要會(huì)改。
hadoop fs -[command] 執(zhí)行HDFS命令
FilesySystemCat.java 代碼分析:
//這段程序用于讀取hdfs上的text文件,并把文件中的內(nèi)容輸出到終端上
import java.io.InputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
public class FileSystemCat {
public static void main(String[] args) throws Exception {
String uri = args[0]; //uri 文件路徑
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri),conf); //配置文件讀取對(duì)象
InputStream in = null;
try{
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false); //open后的操作:取字符流輸出到System.out
} finally {
IOUtils.closeStream(in);
}
}
}
localFile2HDFS 代碼分析:
//這段代碼用于讀入一個(gè)本地的TXT文件并將其第101-120字節(jié)的內(nèi)容寫入HDFS成為一個(gè)新文件
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
public class LocalFile2Hdfs {
public static void main(String[] args) throws Exception {
// 獲取讀取源文件和目標(biāo)文件位置參數(shù)
String local = args[0];
String uri = args[1];
FileInputStream in = null;
OutputStream out = null;
Configuration conf = new Configuration();
try {
// 獲取讀入文件數(shù)據(jù)
in = new FileInputStream(new File(local));
// 獲取目標(biāo)文件信息
FileSystem fs = FileSystem.get(URI.create(uri), conf);
out = fs.create(new Path(uri), new Progressable() {
@Override
public void progress() {
System.out.println("*");
}
});
// 跳過(guò)前100個(gè)字符
in.skip(100);
byte[] buffer = new byte[20];
// 從101的位置讀取20個(gè)字符到buffer中
int bytesRead = in.read(buffer);
if (bytesRead >= 0) {
out.write(buffer, 0, bytesRead);
}
} finally {
IOUtils.closeStream(in);
IOUtils.closeStream(out);
}
}
}
HDFS2LocalFile 代碼分析:
//這段代碼用于讀入一個(gè)在hdfs上的txt文件,并將其第101-120字節(jié)的內(nèi)容寫入本地文件系統(tǒng)成為一個(gè)新文件
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
public class Hdfs2LocalFile {
public static void main(String[] args) throws Exception {
String uri = args[0];
String local = args[1];
FSDataInputStream in = null;
OutputStream out = null;
Configuration conf = new Configuration();
try {
FileSystem fs = FileSystem.get(URI.create(uri), conf);
in = fs.open(new Path(uri));
out = new FileOutputStream(local);
byte[] buffer = new byte[20];
in.skip(100);
int bytesRead = in.read(buffer);
if (bytesRead >= 0) {
out.write(buffer, 0, bytesRead);
}
} finally {
IOUtils.closeStream(in);
IOUtils.closeStream(out);
}
}
}