Project

General

Profile

1
package eu.dnetlib.data.transform;
2

    
3
import java.io.IOException;
4
import java.util.List;
5
import java.util.concurrent.atomic.AtomicInteger;
6

    
7
import eu.dnetlib.data.graph.model.DNGFDecoder;
8
import eu.dnetlib.data.proto.WdsDatasetProtos.WdsDataset;
9
import eu.dnetlib.data.proto.WdsDatasetProtos.WdsDataset.GeoLocation;
10
import org.apache.commons.io.IOUtils;
11
import org.apache.commons.lang3.StringUtils;
12
import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
13
import org.apache.hadoop.conf.Configuration;
14
import org.apache.hadoop.hbase.client.*;
15
import org.apache.hadoop.hbase.util.Bytes;
16
import org.junit.*;
17
import org.springframework.core.io.ClassPathResource;
18
import org.springframework.core.io.Resource;
19

    
20
/**
21
 * Created by claudio on 05/09/16.
22
 */
23
public class HBaseReadTest {
24

    
25
	private static final String TABLE_NAME = "db_wds";
26
	private Resource confIn = new ClassPathResource("eu/dnetlib/data/hadoop/config/hadoop-default.dm.cnr.properties");
27

    
28
	private HTable table;
29

    
30
	@Before
31
	public void setUp() throws IOException {
32

    
33
		final Configuration conf = new Configuration();
34

    
35
		for(final String line : IOUtils.readLines(confIn.getInputStream())) {
36
			System.out.println("line = " + line);
37
			if (!line.trim().isEmpty() && !line.startsWith("#")) {
38
				final String[] split = line.split("=");
39
				conf.set(split[0].trim(), split[1].trim());
40
			}
41
		}
42

    
43
		table = new HTable(conf, Bytes.toBytes(TABLE_NAME));
44
	}
45

    
46
	@Ignore
47
	@Test
48
	public void testReadGeoLocations() throws IOException {
49

    
50
		final Scan scan = new Scan();
51
		scan.addColumn(Bytes.toBytes("dataset"), Bytes.toBytes("body"));
52

    
53
		final ResultScanner rs = table.getScanner(scan);
54

    
55
		System.out.println("start iteration");
56

    
57
		final DescriptiveStatistics statN = new DescriptiveStatistics();
58
		final AtomicInteger invalid = new AtomicInteger(0);
59
		rs.forEach(r -> {
60
			final byte[] b = r.getValue(Bytes.toBytes("dataset"), Bytes.toBytes("body"));
61
			final DNGFDecoder d = DNGFDecoder.decode(b, WdsDataset.geolocation);
62
			final List<GeoLocation> geoList = d.getDNGF().getEntity().getDataset().getMetadata().getExtension(WdsDataset.geolocation);
63
			geoList.forEach(g -> g.getBoxList().forEach(box -> {
64
				if (StringUtils.isNotBlank(box)) {
65
					final String[] split = box.trim().split(" ");
66
					try {
67
						statN.addValue(split.length);
68
						Assert.assertTrue("bad number of coordinates", split.length == 4);
69

    
70
						// Rect(minX=-180.0,maxX=180.0,minY=-90.0,maxY=90.0)
71

    
72
						Assert.assertTrue("minX=-180", Double.parseDouble(split[1]) >= -180.0);
73
						Assert.assertTrue("maxX= 180", Double.parseDouble(split[3]) <=  180.0);
74
						Assert.assertTrue("minY= -90", Double.parseDouble(split[0]) >= -90.0);
75
						Assert.assertTrue("maxY=  90", Double.parseDouble(split[2]) <=  90.0);
76

    
77
						//maxY must be >= minY: 90.0 to -90.0
78
						Assert.assertTrue("maxY must be >= minY", Double.parseDouble(split[2]) >= Double.parseDouble(split[0]));
79

    
80
						//maxY must be >= minY: 90.0 to -90.0
81
						Assert.assertTrue("maxX must be >= minX", Double.parseDouble(split[3]) >= Double.parseDouble(split[1]));
82
					} catch (AssertionError e) {
83
						invalid.set(invalid.get() + 1);
84
						//System.err.println(String.format("document %s has %s coordinates: %s", d.getDNGF().getEntity().getId(), split.length, e.getMessage()));
85
						//throw e;
86
					}
87
				}
88
			}));
89
		});
90

    
91
		rs.close();
92

    
93
		System.out.println(String.format("stat N: %s", statN));
94
		System.out.println(String.format("invalid N: %s", invalid.get()));
95
	}
96

    
97

    
98
	@After
99
	public void tearDown() throws IOException {
100
		table.close();
101
	}
102
}
(1-1/2)