Project

General

Profile

1 28114 eri.katsar
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
2 27955 claudio.at
3
import java.io.BufferedOutputStream;
4
import java.io.File;
5
import java.io.FileOutputStream;
6
import java.io.IOException;
7
import java.io.OutputStream;
8
9
import org.apache.hadoop.conf.Configuration;
10
import org.apache.hadoop.fs.FSDataInputStream;
11
import org.apache.hadoop.fs.FileSystem;
12
import org.apache.hadoop.fs.LocatedFileStatus;
13
import org.apache.hadoop.fs.Path;
14
import org.apache.hadoop.fs.RemoteIterator;
15
import org.apache.log4j.Logger;
16
import org.junit.Test;
17
18
/**
19
 * Simple class to dump a HDFS path to a local folder Used for backups
20
 *
21
 * @author eri
22
 *
23
 */
24
public class HdfsExport {
25
	private Logger log = Logger.getLogger(this.getClass());
26
27
	@SuppressWarnings("static-access")
28 28414 eri.katsar
	 @Test
29 27955 claudio.at
	public void readFile() throws IOException {
30
		Configuration conf = this.dnetConfig();
31 28368 eri.katsar
//		/export_db_openaireplus_node6_t_2014-05-12
32 28627 eri.katsar
		Path pt = new Path("hdfs://quorum1.t.hadoop.research-infrastructures.eu/tmp/test_stats");
33 28414 eri.katsar
//		dnet.clustername				=	DM
34
//
35
//				#CORE-SITE
36
//				fs.defaultFS					=	hdfs://nmis-hadoop-cluster
37
//
38
//				hadoop.security.authentication	=	simple
39
//				hadoop.security.auth_to_local	=	DEFAULT
40
//
41
//				hadoop.rpc.socket.factory.class.default	=	org.apache.hadoop.net.StandardSocketFactory
42 27955 claudio.at
43
		FileSystem fileSystem = FileSystem.get(conf);
44
45
		try {
46
47
			if (fileSystem.exists(pt)) {
48
				log.info("Dir " + pt + "   exists");
49
50
				RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(pt, false);
51
52
				while (files.hasNext()) {
53
54
					Path partPath = files.next().getPath();
55
					String name = partPath.getName();
56
57
					log.info(name);
58
59 28627 eri.katsar
//					if (name.contains("part")) {
60
//						String[] split = name.split("-");
61
//
62
////						if (Integer.parseInt(split[2]) >= Integer.parseInt("00074"))
63
//						{
64
//							log.info("backing up///");
65
//
66
//							FSDataInputStream in = fileSystem.open(partPath);
67
//
68
//							OutputStream out = new BufferedOutputStream(new FileOutputStream(new File(name)));
69
//
70
//							byte[] b = new byte[1024];
71
//							int numBytes = 0;
72
//							while ((numBytes = in.read(b)) > 0) {
73
//								out.write(b, 0, numBytes);
74
//							}
75
//							out.close();
76
//							in.close();
77
//						}
78
//					}
79
				}
80 27955 claudio.at
81
			}
82
83
		} catch (Exception e) {
84
			e.printStackTrace();
85
			log.error(e);
86
87
			fileSystem.close();
88
89
		} finally {
90
91
			fileSystem.close();
92
		}
93
94
	}
95
96
	private Configuration dnetConfig() {// #HBASE-SITE
97
		Configuration conf = new Configuration();
98
		// ZOOKEEPER
99
		conf.set("oozie.service.loc", "http://oozie.t.hadoop.research-infrastructures.eu:11000/oozie");
100
101
		conf.set("zookeeper.znode.rootserver", "root-region-server");
102 28585 eri.katsar
		conf.set("dfs.nameservices", "nmis-hadoop-cluster");
103
		conf.set("dfs.ha.namenodes.nmis-hadoop-cluster","nn1,nn2"
104
				+ "				dfs.namenode.rpc-address.nmis-hadoop-cluster.nn1=quorum1.t.hadoop.research-infrastructures.eu:8020"
105
				+ "dfs.namenode.http-address.nmis-hadoop-cluster.nn1=quorum1.t.hadoop.research-infrastructures.eu:50070"
106
				+ ""
107
				+ "	dfs.namenode.rpc-address.nmis-hadoop-cluster.nn2=quorum2.t.hadoop.research-infrastructures.eu:8020"
108
				+ "	dfs.namenode.http-address.nmis-hadoop-cluster.nn2=quorum2.t.hadoop.research-infrastructures.eu:50070"
109 28590 eri.katsar
				+ "dfs.client.failover.proxy.provider.nmis-hadoop-cluster=org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");
110 27955 claudio.at
		conf.set("dnet.clustername", "DM");
111
112
		conf.set("hbase.zookeeper.property.clientPort", "2182");
113 28627 eri.katsar
		conf.set("fs.defaultFS", "hdfs://quorum1.t.hadoop.research-infrastructures.eu");
114 27955 claudio.at
115
		// #CORE-SITE
116 28579 eri.katsar
//		conf.set("fs.defaultFS", "hdfs://quorum1.t.hadoop.research-infrastructures.eu");
117 27955 claudio.at
118
		conf.set("hadoop.security.authentication", "simple");
119
		conf.set("hadoop.security.auth_to_local", "DEFAULT");
120
		conf.set("hadoop.rpc.socket.factory.class.default", "org.apache.hadoop.net.StandardSocketFactory");
121 28585 eri.katsar
//		quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu
122 27955 claudio.at
		return conf;
123
	}
124
}