1
|
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
|
2
|
|
3
|
import java.io.BufferedOutputStream;
|
4
|
import java.io.File;
|
5
|
import java.io.FileOutputStream;
|
6
|
import java.io.IOException;
|
7
|
import java.io.OutputStream;
|
8
|
|
9
|
import org.apache.hadoop.conf.Configuration;
|
10
|
import org.apache.hadoop.fs.FSDataInputStream;
|
11
|
import org.apache.hadoop.fs.FileSystem;
|
12
|
import org.apache.hadoop.fs.LocatedFileStatus;
|
13
|
import org.apache.hadoop.fs.Path;
|
14
|
import org.apache.hadoop.fs.RemoteIterator;
|
15
|
import org.apache.log4j.Logger;
|
16
|
import org.junit.Test;
|
17
|
|
18
|
/**
|
19
|
* Simple class to dump a HDFS path to a local folder Used for backups
|
20
|
*
|
21
|
* @author eri
|
22
|
*
|
23
|
*/
|
24
|
public class HdfsExport {
|
25
|
private Logger log = Logger.getLogger(this.getClass());
|
26
|
|
27
|
@SuppressWarnings("static-access")
|
28
|
@Test
|
29
|
public void readFile() throws IOException {
|
30
|
Configuration conf = this.dnetConfig();
|
31
|
// /export_db_openaireplus_node6_t_2014-05-12
|
32
|
Path pt = new Path("hdfs://quorum1.t.hadoop.research-infrastructures.eu/tmp/test_stats");
|
33
|
// dnet.clustername = DM
|
34
|
//
|
35
|
// #CORE-SITE
|
36
|
// fs.defaultFS = hdfs://nmis-hadoop-cluster
|
37
|
//
|
38
|
// hadoop.security.authentication = simple
|
39
|
// hadoop.security.auth_to_local = DEFAULT
|
40
|
//
|
41
|
// hadoop.rpc.socket.factory.class.default = org.apache.hadoop.net.StandardSocketFactory
|
42
|
|
43
|
FileSystem fileSystem = FileSystem.get(conf);
|
44
|
|
45
|
try {
|
46
|
|
47
|
if (fileSystem.exists(pt)) {
|
48
|
log.info("Dir " + pt + " exists");
|
49
|
|
50
|
RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(pt, false);
|
51
|
|
52
|
while (files.hasNext()) {
|
53
|
|
54
|
Path partPath = files.next().getPath();
|
55
|
String name = partPath.getName();
|
56
|
|
57
|
log.info(name);
|
58
|
|
59
|
// if (name.contains("part")) {
|
60
|
// String[] split = name.split("-");
|
61
|
//
|
62
|
//// if (Integer.parseInt(split[2]) >= Integer.parseInt("00074"))
|
63
|
// {
|
64
|
// log.info("backing up///");
|
65
|
//
|
66
|
// FSDataInputStream in = fileSystem.open(partPath);
|
67
|
//
|
68
|
// OutputStream out = new BufferedOutputStream(new FileOutputStream(new File(name)));
|
69
|
//
|
70
|
// byte[] b = new byte[1024];
|
71
|
// int numBytes = 0;
|
72
|
// while ((numBytes = in.read(b)) > 0) {
|
73
|
// out.write(b, 0, numBytes);
|
74
|
// }
|
75
|
// out.close();
|
76
|
// in.close();
|
77
|
// }
|
78
|
// }
|
79
|
}
|
80
|
|
81
|
}
|
82
|
|
83
|
} catch (Exception e) {
|
84
|
e.printStackTrace();
|
85
|
log.error(e);
|
86
|
|
87
|
fileSystem.close();
|
88
|
|
89
|
} finally {
|
90
|
|
91
|
fileSystem.close();
|
92
|
}
|
93
|
|
94
|
}
|
95
|
|
96
|
private Configuration dnetConfig() {// #HBASE-SITE
|
97
|
Configuration conf = new Configuration();
|
98
|
// ZOOKEEPER
|
99
|
conf.set("oozie.service.loc", "http://oozie.t.hadoop.research-infrastructures.eu:11000/oozie");
|
100
|
|
101
|
conf.set("zookeeper.znode.rootserver", "root-region-server");
|
102
|
conf.set("dfs.nameservices", "nmis-hadoop-cluster");
|
103
|
conf.set("dfs.ha.namenodes.nmis-hadoop-cluster","nn1,nn2"
|
104
|
+ " dfs.namenode.rpc-address.nmis-hadoop-cluster.nn1=quorum1.t.hadoop.research-infrastructures.eu:8020"
|
105
|
+ "dfs.namenode.http-address.nmis-hadoop-cluster.nn1=quorum1.t.hadoop.research-infrastructures.eu:50070"
|
106
|
+ ""
|
107
|
+ " dfs.namenode.rpc-address.nmis-hadoop-cluster.nn2=quorum2.t.hadoop.research-infrastructures.eu:8020"
|
108
|
+ " dfs.namenode.http-address.nmis-hadoop-cluster.nn2=quorum2.t.hadoop.research-infrastructures.eu:50070"
|
109
|
+ "dfs.client.failover.proxy.provider.nmis-hadoop-cluster=org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");
|
110
|
conf.set("dnet.clustername", "DM");
|
111
|
|
112
|
conf.set("hbase.zookeeper.property.clientPort", "2182");
|
113
|
conf.set("fs.defaultFS", "hdfs://quorum1.t.hadoop.research-infrastructures.eu");
|
114
|
|
115
|
// #CORE-SITE
|
116
|
// conf.set("fs.defaultFS", "hdfs://quorum1.t.hadoop.research-infrastructures.eu");
|
117
|
|
118
|
conf.set("hadoop.security.authentication", "simple");
|
119
|
conf.set("hadoop.security.auth_to_local", "DEFAULT");
|
120
|
conf.set("hadoop.rpc.socket.factory.class.default", "org.apache.hadoop.net.StandardSocketFactory");
|
121
|
// quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu
|
122
|
return conf;
|
123
|
}
|
124
|
}
|