天天看點

hdfs對檔案的增删改查

源代碼:

pom.xml:

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>cn.idcast</groupId>
    <artifactId>hdfs_api_demo</artifactId>
    <version>1.0-SNAPSHOT</version>
    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>3.1.4</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>3.1.4</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>3.1.4</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>3.1.4</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>RELEASE</version>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <!--java編譯插件-->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <encoding>UTF-8</encoding>
                </configuration>
            </plugin>

            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>2.4.3</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <minimizeJar>true</minimizeJar>
                        </configuration>
                    </execution>

                </executions>
            </plugin>
        </plugins>
    </build>
</project>      

java:

package cn.idcast.hdfs_api;

import com.jcraft.jsch.IO;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.kerby.util.IOUtil;
import org.apache.log4j.BasicConfigurator;
import org.junit.Test;

import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

public class HdfsApiDemo {
    //擷取FileSystem--方法1
    @Test
    public void getFileSystem1() throws IOException {
        Configuration configuration=new Configuration();
        configuration.set("fs.defaultFS","hdfs://node1:8020");
        FileSystem fileSystem = FileSystem.get(configuration);
        System.out.println(fileSystem.toString());
    }
    //擷取FileSystem--方法2
    @Test
    public void getFileSystem2() throws IOException, URISyntaxException {
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration());
        System.out.println(fileSystem);
    }
    //擷取FileSystem--方法3
    @Test
    public void getFileSystem3() throws IOException {
        Configuration configuration=new Configuration();
        configuration.set("fs.defaultFS","hdfs://node1:8020");
        FileSystem fileSystem = FileSystem.newInstance(configuration);
        System.out.println(fileSystem.toString());
    }
    //擷取FileSystem--方法4
    @Test
    public void getFileSystem4() throws IOException, URISyntaxException {
        FileSystem fileSystem = FileSystem.newInstance(new URI("hdfs://node1:8020"),new Configuration());
        System.out.println(fileSystem.toString());
    }
    //周遊所有檔案
    @Test
    public void listMyFiles() throws Exception, URISyntaxException {
       //1:擷取FileSystem執行個體
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");
        //2:調用方法listFiles 擷取 /  目錄下所有檔案資訊
        RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fileSystem.listFiles(new Path("/"), true);
       //周遊疊代器
        while(locatedFileStatusRemoteIterator.hasNext()){
            LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
            System.out.println(next.getPath().toString());
        }
        fileSystem.close();
    }
    //建立檔案目錄
    @Test
    public void mkdirs() throws IOException, URISyntaxException, InterruptedException {
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");
        boolean mkdirs = fileSystem.mkdirs(new Path("/hello/mydir/test"));
        System.out.println(mkdirs);
        fileSystem.close();
    }
    //建立檔案夾
    @Test
    public void mkdirsTest() throws IOException, URISyntaxException, InterruptedException {
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");
        fileSystem.create(new Path("/hello/mydir/test/a.txt"));
       // System.out.println(mkdirs);
        //fileSystem.close();
    }
    //實作檔案的下載下傳
    @Test
    public void downloadFile() throws URISyntaxException, IOException, InterruptedException {
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");
        FSDataInputStream inputStream = fileSystem.open(new Path("/hello/mydir/test/a.txt"));
        FileOutputStream outputStream = new FileOutputStream("D://a.txt");
        IOUtils.copy(inputStream,outputStream);
        IOUtils.closeQuietly(inputStream);
        IOUtils.closeQuietly(outputStream);
        fileSystem.close();
    }
    //實作檔案的下載下傳--簡單方法
    @Test
    public void downloadFile2() throws URISyntaxException, IOException, InterruptedException {
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");
        fileSystem.copyToLocalFile(new Path("/hello/mydir/test/a.txt"),new Path("D://a.txt"));
        fileSystem.close();
    }
    //實作檔案的上傳
    @Test
    public void uploadFile() throws URISyntaxException, IOException, InterruptedException {
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");
        fileSystem.copyFromLocalFile(new Path("D://hdfs-site.txt"),new Path("/"));
        fileSystem.close();
    }
    //小檔案的合并
    @Test
    public void mergeFile() throws URISyntaxException, IOException, InterruptedException {
        //1:擷取FileSystem(分布式檔案系統)
        FileSystem fileSystem = FileSystem.get(new URI("hdfs://node1:8020"),new Configuration(),"root");
        //2:擷取hdfs大檔案的輸出流
        FSDataOutputStream outputStream = fileSystem.create(new Path("/big_txt.txt"));
        //3:擷取一個本地檔案系統
        LocalFileSystem localFileSystem = FileSystem.getLocal(new Configuration());
        //4:擷取本地檔案夾下所有檔案的詳情
        FileStatus[] fileStatuses = localFileSystem.listStatus(new Path("D://input"));
        //5:周遊每個檔案,擷取每個檔案的輸入流
        for (FileStatus fileStatus : fileStatuses) {
            FSDataInputStream inputStream = localFileSystem.open(fileStatus.getPath());
            //6:将小檔案的資料複制到檔案
            IOUtils.copy(inputStream,outputStream);
            IOUtils.closeQuietly(inputStream);
        }
        //7:關閉流
        IOUtils.closeQuietly(outputStream);
        localFileSystem.close();
        fileSystem.close();
    }
}