總結: 之前在教材上看hdfs的Java用戶端編寫,隻有關鍵代碼,呵呵……。閑話不說,上正文。
1. Hadoop 的Java用戶端編寫建議在linux系統上開發
2. 可以使用eclipse,idea 等IDE工具,目前比較流行的是idea
3. 建立項目之後需要添加很多jar包,win,linux下添加jar方式略有不同
4. 使用代碼會出現檔案格式不認識,權限等問題。
具體:
1.首先測試從hdfs中下載下傳檔案:
下載下傳檔案的代碼:(将hdfs://localhost:9000/jdk-7u65-linux-i586.tar.gz檔案下載下傳到本地/opt/download/doload.tgz)
package cn.qlq.hdfs;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class HdfsUtil {
public static void main(String a[]) throws IOException {
//to upload a file
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path path = new Path("hdfs://localhost:9000/jdk-7u65-linux-i586.tar.gz");
FSDataInputStream input = fs.open(path);
FileOutputStream output = new FileOutputStream("/opt/download/doload.tgz");
IOUtils.copy(input, output);
}
}
直接運作報錯:
原因是程式不認識 hdfs://localhost:9000/jdk-7u65-linux-i586.tar.gz 這樣的目錄
log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
Exception in thread "main" java.lang.IllegalArgumentException: Wrong FS: hdfs://localhost:9000/jdk-7u65-linux-i586.tar.gz, expected: file:///
at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:643)
at org.apache.hadoop.fs.RawLocalFileSystem.pathToFile(RawLocalFileSystem.java:79)
at org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:506)
at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:724)
at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:501)
at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:397)
at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.(ChecksumFileSystem.java:137)
at org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:339)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:764)
at cn.qlq.hdfs.HdfsUtil.main(HdfsUtil.java:21)
解決辦法:
第一種: 将hadoop安裝目錄下的etc目錄中的core-site.xml拷貝到eclipse的src目錄下。這樣就不會報錯
運作結果:
[[email protected] download]# ll
total 140224
-rw-r--r--. 1 root root 143588167 Apr 20 05:55 doload.tgz
[[email protected] download]# pwd
/opt/download
[[email protected] download]# ll
total 140224
-rw-r--r--. 1 root root 143588167 Apr 20 05:55 doload.tgz
第二種:直接在程式中修改
我們先檢視hdfs-site.xml中的内容:
fs.defaultFS
hdfs://localhost:9000
hadoop.tmp.dir
/opt/hadoop/hadoop-2.4.1/data/
代碼改為:
public static void main(String a[]) throws IOException {
//to upload a filed
Configuration conf = new Configuration();
//set hdfs root dir
conf.set("fs.defaultFS", "hdfs://localhost:9000");
FileSystem fs = FileSystem.get(conf);
Path path = new Path("hdfs://localhost:9000/jdk-7u65-linux-i586.tar.gz");
FSDataInputStream input = fs.open(path);
FileOutputStream output = new FileOutputStream("/opt/download/doload.tgz");
IOUtils.copy(input, output);
}
2.下面代碼示範了hdfs的基本操作:
package cn.qlq.hdfs;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.Before;
import org.junit.Test;
public class HdfsUtil {
private FileSystem fs = null;
@Before
public void befor() throws IOException, InterruptedException, URISyntaxException{
//讀取classpath下的xxx-site.xml 配置檔案,并解析其内容,封裝到conf對象中
Configuration conf = new Configuration();
//也可以在代碼中對conf中的配置資訊進行手動設定,會覆寫掉配置檔案中的讀取的值
conf.set("fs.defaultFS", "hdfs://localhost:9000/");
//根據配置資訊,去擷取一個具體檔案系統的用戶端操作執行個體對象
fs = FileSystem.get(new URI("hdfs://localhost:9000/"),conf,"root");
}
@Test
public void upload() throws Exception {
Path dst = new Path("hdfs://localhost:9000/aa/qingshu.txt");
FSDataOutputStream os = fs.create(dst);
FileInputStream is = new FileInputStream("/opt/download/haha.txt");
IOUtils.copy(is, os);
}
@Test
public void upload2() throws Exception, IOException{
fs.copyFromLocalFile(new Path("/opt/download/haha.txt"), new Path("hdfs://localhost:9000/aa/qingshu2.txt"));
}
@Test
public void download() throws IOException {
Path path = new Path("hdfs://localhost:9000/jdk-7u65-linux-i586.tar.gz");
FSDataInputStream input = fs.open(path);
FileOutputStream output = new FileOutputStream("/opt/download/doload.tgz");
IOUtils.copy(input, output);
}
@Test
public void download2() throws Exception {
fs.copyToLocalFile(new Path("hdfs://localhost:9000/aa/qingshu2.txt"), new Path("/opt/download/haha2.txt"));
}
@Test
public void listFiles() throws FileNotFoundException, IllegalArgumentException, IOException {
// listFiles列出的是檔案資訊,而且提供遞歸周遊
RemoteIterator files = fs.listFiles(new Path("/"), true);
while(files.hasNext()){
LocatedFileStatus file = files.next();
Path filePath = file.getPath();
String fileName = filePath.getName();
System.out.println(fileName);
}
System.out.println("---------------------------------");
//listStatus 可以列出檔案和檔案夾的資訊,但是不提供自帶的遞歸周遊
FileStatus[] listStatus = fs.listStatus(new Path("/"));
for(FileStatus status: listStatus){
String name = status.getPath().getName();
System.out.println(name + (status.isDirectory()?" is dir":" is file"));
}
}
@Test
public void mkdir() throws IllegalArgumentException, Exception {
fs.mkdirs(new Path("/aaa/bbb/ccc"));
}
@Test
public void rm() throws IllegalArgumentException, IOException {
fs.delete(new Path("/aa"), true);
}
}