Hdfs的JAVA客户端基本操作

时间:2022-02-18 22:14:08

一:需要的jar包:

hadoop-2.4.1\share\hadoop\hdfs\hadoop-hdfs-2.4.1.jar
hadoop-2.4.1\share\hadoop\hdfs\lib\所有jar包

hadoop-2.4.1\share\hadoop\common\hadoop-common-2.4.1.jar
hadoop-2.4.1\share\hadoop\common\lib\所有jar包



二:连接HDFS和客户端

HDFS环境配置可以参考这篇博客:HDFS环境配置

public class HdfsUtil {
public static void main(String[] args) throws IOException, InterruptedException, URISyntaxException {
//构造一个配置参数封装对象
Configuration conf = new Configuration();
//构造一个hdfs的客户端
FileSystem fs=FileSystem.get(new URI("hdfs://192.168.77.70:9000"), conf, "root");
//用hdfs文件系统的客户端对象fs来操作文件,比如上传一个文件
fs.copyFromLocalFile(new Path("C:/jdk-7u65-linux-i586.tar.gz"), new Path("/"));
fs.close();
}
}



三:Java客户端基本操作:

public class HdfsUtil {
FileSystem fs=null;

@Before
public void init() throws IOException, InterruptedException, URISyntaxException{
// 构造一个配置参数封装对象
Configuration conf = new Configuration();
//构造一个hdfs的客户端
fs=FileSystem.get(new URI("hdfs://192.168.77.70:9000"), conf, "root");
}

/*
* 从本地上传文件到hdfs中
*/
@Test
public void testUpload() throws IllegalArgumentException, IOException{
fs.copyFromLocalFile(new Path("C:/jdk-7u65-linux-i586.tar.gz"), new Path("/"));
fs.close();
}

/*
* 从hdfs中下载文件到本地
*/
@Test
public void testDownload() throws IllegalArgumentException, IOException{
fs.copyToLocalFile(false, new Path("/jdk-7u65-linux-i586.tar.gz"), new Path("C:/"), true);
fs.close();
}

/*
* 文件夹操作
*/
@Test
public void testDir() throws IllegalArgumentException, IOException{
fs.mkdirs(new Path("/aaa"));
System.out.println("创建了一个文件夹:/aaa");

boolean exists = fs.exists(new Path("/aaa"));
System.out.println("/aaa文件夹存在否?"+exists);

fs.copyFromLocalFile(new Path("C:/input.txt"), new Path("/aaa"));
System.out.println("成功上传了一个文件到/aaa目录下");

fs.delete(new Path("/aaa"), true);
System.out.println("已经将/aaa目录删除");

boolean exists2 = fs.exists(new Path("/aaa"));
System.out.println("/aaa文件夹存在否?"+exists2);
fs.close();
}

/*
* 文件信息查看
*/
@Test
public void testFileStatus() throws FileNotFoundException, IllegalArgumentException, IOException{
//只能列出文件信息
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);
while(listFiles.hasNext()){
LocatedFileStatus fileStatus = listFiles.next();
System.out.println(fileStatus.getPath().getName());
}

System.out.println("-----------------------");
//能列出文件和文件夹信息
FileStatus[] listStatus = fs.listStatus(new Path("/"));
for(FileStatus f:listStatus){
String type="-";
if(f.isDirectory()) type="d";
System.out.println(type+"\t"+f.getPath().getName());
}
fs.close();
}

@Test
public void testOthers() throws IllegalArgumentException, IOException{
//文件偏移量信息
BlockLocation[] fileBlockLocations = fs.getFileBlockLocations(new Path("/jdk-7u65-linux-i586.tar.gz"), 0, 143588167);
for(BlockLocation location : fileBlockLocations){
System.out.println(location.getOffset());
System.out.println(location.getNames()[0]);
}

//修改文件名
fs.rename(new Path("/jdk-7u65-linux-i586.tar.gz"), new Path("/jdk-7u65-linux-i586.tgz"));

//修改一个文件的副本数量
fs.setReplication(new Path("/jdk-7u65-linux-i586.tgz"), (short)2);
fs.close();
}

}


四:Java客户端IO流操作:

package hdfsUtil;

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Before;
import org.junit.Test;


public class HdfsIO {
FileSystem fs=null;

@Before
public void init() throws IOException, InterruptedException, URISyntaxException{
// 构造一个配置参数封装对象
Configuration conf = new Configuration();
//构造一个hdfs的客户端
fs=FileSystem.get(new URI("hdfs://192.168.77.70:9000"), conf, "root");
}

/*
* 下载文件
*/
@Test
public void testDownload() throws IllegalArgumentException, IOException{
FSDataInputStream in = fs.open(new Path("/jdk-7u65-linux-i586.tgz"));
FileOutputStream out=new FileOutputStream("C:/jdk.tgz");
IOUtils.copyBytes(in,out,new Configuration());
IOUtils.closeStream(in);
IOUtils.closeStream(out);
fs.close();
}

/*
* 上传文件
*/
@Test
public void testUpload() throws IllegalArgumentException, IOException{
FileInputStream in=new FileInputStream("c:/jdk.tgz");
FSDataOutputStream out = fs.create(new Path("/jdk.tar.gz"));
IOUtils.copyBytes(in, out, new Configuration());
IOUtils.closeStream(in);
IOUtils.closeStream(out);
fs.close();
}

/*
* 从指定偏移量读取hdfs中的文件数据
* 在分布式数据处理时,可以将数据分片来分配给不同的节点处理
*/
@Test
public void testSeek() throws IllegalArgumentException, IOException{
FSDataInputStream in = fs.open(new Path("/test.txt"));
in.seek(6);//定位,设置起始偏移量
FileOutputStream out=new FileOutputStream("c:/test.seg.txt");
IOUtils.copyBytes(in, out, new Configuration());
IOUtils.closeStream(in);
IOUtils.closeStream(out);
fs.close();
}
}