1
|
package eu.dnetlib.data.transform;
|
2
|
|
3
|
import java.io.IOException;
|
4
|
import java.util.List;
|
5
|
import java.util.concurrent.atomic.AtomicInteger;
|
6
|
|
7
|
import eu.dnetlib.data.graph.model.DNGFDecoder;
|
8
|
import eu.dnetlib.data.proto.WdsDatasetProtos.WdsDataset;
|
9
|
import eu.dnetlib.data.proto.WdsDatasetProtos.WdsDataset.GeoLocation;
|
10
|
import org.apache.commons.io.IOUtils;
|
11
|
import org.apache.commons.lang3.StringUtils;
|
12
|
import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
|
13
|
import org.apache.hadoop.conf.Configuration;
|
14
|
import org.apache.hadoop.hbase.client.*;
|
15
|
import org.apache.hadoop.hbase.util.Bytes;
|
16
|
import org.junit.*;
|
17
|
import org.springframework.core.io.ClassPathResource;
|
18
|
import org.springframework.core.io.Resource;
|
19
|
|
20
|
/**
|
21
|
* Created by claudio on 05/09/16.
|
22
|
*/
|
23
|
public class HBaseReadTest {
|
24
|
|
25
|
private static final String TABLE_NAME = "db_wds";
|
26
|
private Resource confIn = new ClassPathResource("eu/dnetlib/data/hadoop/config/hadoop-default.dm.cnr.properties");
|
27
|
|
28
|
private HTable table;
|
29
|
|
30
|
@Before
|
31
|
public void setUp() throws IOException {
|
32
|
|
33
|
final Configuration conf = new Configuration();
|
34
|
|
35
|
for(final String line : IOUtils.readLines(confIn.getInputStream())) {
|
36
|
System.out.println("line = " + line);
|
37
|
if (!line.trim().isEmpty() && !line.startsWith("#")) {
|
38
|
final String[] split = line.split("=");
|
39
|
conf.set(split[0].trim(), split[1].trim());
|
40
|
}
|
41
|
}
|
42
|
|
43
|
table = new HTable(conf, Bytes.toBytes(TABLE_NAME));
|
44
|
}
|
45
|
|
46
|
@Ignore
|
47
|
@Test
|
48
|
public void testReadGeoLocations() throws IOException {
|
49
|
|
50
|
final Scan scan = new Scan();
|
51
|
scan.addColumn(Bytes.toBytes("dataset"), Bytes.toBytes("body"));
|
52
|
|
53
|
final ResultScanner rs = table.getScanner(scan);
|
54
|
|
55
|
System.out.println("start iteration");
|
56
|
|
57
|
final DescriptiveStatistics statN = new DescriptiveStatistics();
|
58
|
final AtomicInteger invalid = new AtomicInteger(0);
|
59
|
rs.forEach(r -> {
|
60
|
final byte[] b = r.getValue(Bytes.toBytes("dataset"), Bytes.toBytes("body"));
|
61
|
final DNGFDecoder d = DNGFDecoder.decode(b, WdsDataset.geolocation);
|
62
|
final List<GeoLocation> geoList = d.getDNGF().getEntity().getDataset().getMetadata().getExtension(WdsDataset.geolocation);
|
63
|
geoList.forEach(g -> g.getBoxList().forEach(box -> {
|
64
|
if (StringUtils.isNotBlank(box)) {
|
65
|
final String[] split = box.trim().split(" ");
|
66
|
try {
|
67
|
statN.addValue(split.length);
|
68
|
Assert.assertTrue("bad number of coordinates", split.length == 4);
|
69
|
|
70
|
// Rect(minX=-180.0,maxX=180.0,minY=-90.0,maxY=90.0)
|
71
|
|
72
|
Assert.assertTrue("minX=-180", Double.parseDouble(split[1]) >= -180.0);
|
73
|
Assert.assertTrue("maxX= 180", Double.parseDouble(split[3]) <= 180.0);
|
74
|
Assert.assertTrue("minY= -90", Double.parseDouble(split[0]) >= -90.0);
|
75
|
Assert.assertTrue("maxY= 90", Double.parseDouble(split[2]) <= 90.0);
|
76
|
|
77
|
//maxY must be >= minY: 90.0 to -90.0
|
78
|
Assert.assertTrue("maxY must be >= minY", Double.parseDouble(split[2]) >= Double.parseDouble(split[0]));
|
79
|
|
80
|
//maxY must be >= minY: 90.0 to -90.0
|
81
|
Assert.assertTrue("maxX must be >= minX", Double.parseDouble(split[3]) >= Double.parseDouble(split[1]));
|
82
|
} catch (AssertionError e) {
|
83
|
invalid.set(invalid.get() + 1);
|
84
|
//System.err.println(String.format("document %s has %s coordinates: %s", d.getDNGF().getEntity().getId(), split.length, e.getMessage()));
|
85
|
//throw e;
|
86
|
}
|
87
|
}
|
88
|
}));
|
89
|
});
|
90
|
|
91
|
rs.close();
|
92
|
|
93
|
System.out.println(String.format("stat N: %s", statN));
|
94
|
System.out.println(String.format("invalid N: %s", invalid.get()));
|
95
|
}
|
96
|
|
97
|
|
98
|
@After
|
99
|
public void tearDown() throws IOException {
|
100
|
table.close();
|
101
|
}
|
102
|
}
|