Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
2

    
3
import java.io.BufferedOutputStream;
4
import java.io.File;
5
import java.io.FileOutputStream;
6
import java.io.IOException;
7
import java.io.OutputStream;
8

    
9
import org.apache.hadoop.conf.Configuration;
10
import org.apache.hadoop.fs.FSDataInputStream;
11
import org.apache.hadoop.fs.FileSystem;
12
import org.apache.hadoop.fs.LocatedFileStatus;
13
import org.apache.hadoop.fs.Path;
14
import org.apache.hadoop.fs.RemoteIterator;
15
import org.apache.log4j.Logger;
16
import org.junit.Test;
17

    
18
/**
19
 * Simple class to dump a HDFS path to a local folder Used for backups
20
 * 
21
 * @author eri
22
 * 
23
 */
24
public class HdfsExport {
25
	private Logger log = Logger.getLogger(this.getClass());
26

    
27
	@SuppressWarnings("static-access")
28
	 @Test
29
	public void readFile() throws IOException {
30
		Configuration conf = this.dnetConfig();
31
//		/export_db_openaireplus_node6_t_2014-05-12
32
		Path pt = new Path("hdfs://quorum1.t.hadoop.research-infrastructures.eu/tmp/test_stats");
33
//		dnet.clustername				=	DM
34
//
35
//				#CORE-SITE
36
//				fs.defaultFS					=	hdfs://nmis-hadoop-cluster
37
//
38
//				hadoop.security.authentication	=	simple
39
//				hadoop.security.auth_to_local	=	DEFAULT
40
//
41
//				hadoop.rpc.socket.factory.class.default	=	org.apache.hadoop.net.StandardSocketFactory
42

    
43
		FileSystem fileSystem = FileSystem.get(conf);
44

    
45
		try {
46

    
47
			if (fileSystem.exists(pt)) {
48
				log.info("Dir " + pt + "   exists");
49

    
50
				RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(pt, false);
51

    
52
				while (files.hasNext()) {
53

    
54
					Path partPath = files.next().getPath();
55
					String name = partPath.getName();
56

    
57
					log.info(name);
58

    
59
//					if (name.contains("part")) {
60
//						String[] split = name.split("-");
61
//
62
////						if (Integer.parseInt(split[2]) >= Integer.parseInt("00074")) 
63
//						{
64
//							log.info("backing up///");
65
//
66
//							FSDataInputStream in = fileSystem.open(partPath);
67
//
68
//							OutputStream out = new BufferedOutputStream(new FileOutputStream(new File(name)));
69
//
70
//							byte[] b = new byte[1024];
71
//							int numBytes = 0;
72
//							while ((numBytes = in.read(b)) > 0) {
73
//								out.write(b, 0, numBytes);
74
//							}
75
//							out.close();
76
//							in.close();
77
//						}
78
//					}
79
				} 
80

    
81
			}
82

    
83
		} catch (Exception e) {
84
			e.printStackTrace();
85
			log.error(e);
86

    
87
			fileSystem.close();
88

    
89
		} finally {
90

    
91
			fileSystem.close();
92
		}
93

    
94
	}
95

    
96
	private Configuration dnetConfig() {// #HBASE-SITE
97
		Configuration conf = new Configuration();
98
		// ZOOKEEPER
99
		conf.set("oozie.service.loc", "http://oozie.t.hadoop.research-infrastructures.eu:11000/oozie");
100

    
101
		conf.set("zookeeper.znode.rootserver", "root-region-server");
102
		conf.set("dfs.nameservices", "nmis-hadoop-cluster");
103
		conf.set("dfs.ha.namenodes.nmis-hadoop-cluster","nn1,nn2"
104
				+ "				dfs.namenode.rpc-address.nmis-hadoop-cluster.nn1=quorum1.t.hadoop.research-infrastructures.eu:8020"
105
				+ "dfs.namenode.http-address.nmis-hadoop-cluster.nn1=quorum1.t.hadoop.research-infrastructures.eu:50070"
106
				+ ""
107
				+ "	dfs.namenode.rpc-address.nmis-hadoop-cluster.nn2=quorum2.t.hadoop.research-infrastructures.eu:8020"
108
				+ "	dfs.namenode.http-address.nmis-hadoop-cluster.nn2=quorum2.t.hadoop.research-infrastructures.eu:50070"
109
				+ "dfs.client.failover.proxy.provider.nmis-hadoop-cluster=org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"); 		 
110
		conf.set("dnet.clustername", "DM");
111

    
112
		conf.set("hbase.zookeeper.property.clientPort", "2182");
113
		conf.set("fs.defaultFS", "hdfs://quorum1.t.hadoop.research-infrastructures.eu");
114

    
115
		// #CORE-SITE
116
//		conf.set("fs.defaultFS", "hdfs://quorum1.t.hadoop.research-infrastructures.eu");
117

    
118
		conf.set("hadoop.security.authentication", "simple");
119
		conf.set("hadoop.security.auth_to_local", "DEFAULT");
120
		conf.set("hadoop.rpc.socket.factory.class.default", "org.apache.hadoop.net.StandardSocketFactory");
121
//		quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu
122
		return conf;
123
	}
124
}
(4-4/6)