1 |
28114
|
eri.katsar
|
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
|
2 |
27955
|
claudio.at
|
|
3 |
|
|
import java.io.BufferedOutputStream;
|
4 |
|
|
import java.io.File;
|
5 |
|
|
import java.io.FileOutputStream;
|
6 |
|
|
import java.io.IOException;
|
7 |
|
|
import java.io.OutputStream;
|
8 |
|
|
|
9 |
|
|
import org.apache.hadoop.conf.Configuration;
|
10 |
|
|
import org.apache.hadoop.fs.FSDataInputStream;
|
11 |
|
|
import org.apache.hadoop.fs.FileSystem;
|
12 |
|
|
import org.apache.hadoop.fs.LocatedFileStatus;
|
13 |
|
|
import org.apache.hadoop.fs.Path;
|
14 |
|
|
import org.apache.hadoop.fs.RemoteIterator;
|
15 |
|
|
import org.apache.log4j.Logger;
|
16 |
|
|
import org.junit.Test;
|
17 |
|
|
|
18 |
|
|
/**
|
19 |
|
|
* Simple class to dump a HDFS path to a local folder Used for backups
|
20 |
|
|
*
|
21 |
|
|
* @author eri
|
22 |
|
|
*
|
23 |
|
|
*/
|
24 |
|
|
public class HdfsExport {
|
25 |
|
|
private Logger log = Logger.getLogger(this.getClass());
|
26 |
|
|
|
27 |
|
|
@SuppressWarnings("static-access")
|
28 |
28414
|
eri.katsar
|
@Test
|
29 |
27955
|
claudio.at
|
public void readFile() throws IOException {
|
30 |
|
|
Configuration conf = this.dnetConfig();
|
31 |
28368
|
eri.katsar
|
// /export_db_openaireplus_node6_t_2014-05-12
|
32 |
28627
|
eri.katsar
|
Path pt = new Path("hdfs://quorum1.t.hadoop.research-infrastructures.eu/tmp/test_stats");
|
33 |
28414
|
eri.katsar
|
// dnet.clustername = DM
|
34 |
|
|
//
|
35 |
|
|
// #CORE-SITE
|
36 |
|
|
// fs.defaultFS = hdfs://nmis-hadoop-cluster
|
37 |
|
|
//
|
38 |
|
|
// hadoop.security.authentication = simple
|
39 |
|
|
// hadoop.security.auth_to_local = DEFAULT
|
40 |
|
|
//
|
41 |
|
|
// hadoop.rpc.socket.factory.class.default = org.apache.hadoop.net.StandardSocketFactory
|
42 |
27955
|
claudio.at
|
|
43 |
|
|
FileSystem fileSystem = FileSystem.get(conf);
|
44 |
|
|
|
45 |
|
|
try {
|
46 |
|
|
|
47 |
|
|
if (fileSystem.exists(pt)) {
|
48 |
|
|
log.info("Dir " + pt + " exists");
|
49 |
|
|
|
50 |
|
|
RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(pt, false);
|
51 |
|
|
|
52 |
|
|
while (files.hasNext()) {
|
53 |
|
|
|
54 |
|
|
Path partPath = files.next().getPath();
|
55 |
|
|
String name = partPath.getName();
|
56 |
|
|
|
57 |
|
|
log.info(name);
|
58 |
|
|
|
59 |
28627
|
eri.katsar
|
// if (name.contains("part")) {
|
60 |
|
|
// String[] split = name.split("-");
|
61 |
|
|
//
|
62 |
|
|
//// if (Integer.parseInt(split[2]) >= Integer.parseInt("00074"))
|
63 |
|
|
// {
|
64 |
|
|
// log.info("backing up///");
|
65 |
|
|
//
|
66 |
|
|
// FSDataInputStream in = fileSystem.open(partPath);
|
67 |
|
|
//
|
68 |
|
|
// OutputStream out = new BufferedOutputStream(new FileOutputStream(new File(name)));
|
69 |
|
|
//
|
70 |
|
|
// byte[] b = new byte[1024];
|
71 |
|
|
// int numBytes = 0;
|
72 |
|
|
// while ((numBytes = in.read(b)) > 0) {
|
73 |
|
|
// out.write(b, 0, numBytes);
|
74 |
|
|
// }
|
75 |
|
|
// out.close();
|
76 |
|
|
// in.close();
|
77 |
|
|
// }
|
78 |
|
|
// }
|
79 |
|
|
}
|
80 |
27955
|
claudio.at
|
|
81 |
|
|
}
|
82 |
|
|
|
83 |
|
|
} catch (Exception e) {
|
84 |
|
|
e.printStackTrace();
|
85 |
|
|
log.error(e);
|
86 |
|
|
|
87 |
|
|
fileSystem.close();
|
88 |
|
|
|
89 |
|
|
} finally {
|
90 |
|
|
|
91 |
|
|
fileSystem.close();
|
92 |
|
|
}
|
93 |
|
|
|
94 |
|
|
}
|
95 |
|
|
|
96 |
|
|
private Configuration dnetConfig() {// #HBASE-SITE
|
97 |
|
|
Configuration conf = new Configuration();
|
98 |
|
|
// ZOOKEEPER
|
99 |
|
|
conf.set("oozie.service.loc", "http://oozie.t.hadoop.research-infrastructures.eu:11000/oozie");
|
100 |
|
|
|
101 |
|
|
conf.set("zookeeper.znode.rootserver", "root-region-server");
|
102 |
28585
|
eri.katsar
|
conf.set("dfs.nameservices", "nmis-hadoop-cluster");
|
103 |
|
|
conf.set("dfs.ha.namenodes.nmis-hadoop-cluster","nn1,nn2"
|
104 |
|
|
+ " dfs.namenode.rpc-address.nmis-hadoop-cluster.nn1=quorum1.t.hadoop.research-infrastructures.eu:8020"
|
105 |
|
|
+ "dfs.namenode.http-address.nmis-hadoop-cluster.nn1=quorum1.t.hadoop.research-infrastructures.eu:50070"
|
106 |
|
|
+ ""
|
107 |
|
|
+ " dfs.namenode.rpc-address.nmis-hadoop-cluster.nn2=quorum2.t.hadoop.research-infrastructures.eu:8020"
|
108 |
|
|
+ " dfs.namenode.http-address.nmis-hadoop-cluster.nn2=quorum2.t.hadoop.research-infrastructures.eu:50070"
|
109 |
28590
|
eri.katsar
|
+ "dfs.client.failover.proxy.provider.nmis-hadoop-cluster=org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");
|
110 |
27955
|
claudio.at
|
conf.set("dnet.clustername", "DM");
|
111 |
|
|
|
112 |
|
|
conf.set("hbase.zookeeper.property.clientPort", "2182");
|
113 |
28627
|
eri.katsar
|
conf.set("fs.defaultFS", "hdfs://quorum1.t.hadoop.research-infrastructures.eu");
|
114 |
27955
|
claudio.at
|
|
115 |
|
|
// #CORE-SITE
|
116 |
28579
|
eri.katsar
|
// conf.set("fs.defaultFS", "hdfs://quorum1.t.hadoop.research-infrastructures.eu");
|
117 |
27955
|
claudio.at
|
|
118 |
|
|
conf.set("hadoop.security.authentication", "simple");
|
119 |
|
|
conf.set("hadoop.security.auth_to_local", "DEFAULT");
|
120 |
|
|
conf.set("hadoop.rpc.socket.factory.class.default", "org.apache.hadoop.net.StandardSocketFactory");
|
121 |
28585
|
eri.katsar
|
// quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu
|
122 |
27955
|
claudio.at
|
return conf;
|
123 |
|
|
}
|
124 |
|
|
}
|