1 |
45856
|
sandro.lab
|
package eu.dnetlib.data.transform;
|
2 |
|
|
|
3 |
45883
|
claudio.at
|
import java.util.List;
|
4 |
|
|
import java.util.Map;
|
5 |
|
|
import java.util.Map.Entry;
|
6 |
|
|
|
7 |
45856
|
sandro.lab
|
import com.google.common.collect.Lists;
|
8 |
45883
|
claudio.at
|
import com.google.common.collect.Maps;
|
9 |
|
|
import com.googlecode.protobuf.format.JsonFormat;
|
10 |
|
|
import eu.dnetlib.data.graph.model.DNGFDecoder;
|
11 |
|
|
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder;
|
12 |
|
|
import eu.dnetlib.data.mapreduce.util.dao.HBaseTableDAO;
|
13 |
|
|
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
|
14 |
|
|
import eu.dnetlib.data.proto.DliFieldTypeProtos;
|
15 |
|
|
import eu.dnetlib.data.proto.DliProtos;
|
16 |
|
|
import eu.dnetlib.data.proto.TypeProtos.Type;
|
17 |
46233
|
sandro.lab
|
import eu.dnetlib.dli.proto.DNGFDLISummaryConverter;
|
18 |
45883
|
claudio.at
|
import org.apache.commons.lang3.StringUtils;
|
19 |
|
|
import org.apache.commons.logging.Log;
|
20 |
|
|
import org.apache.commons.logging.LogFactory;
|
21 |
45856
|
sandro.lab
|
import org.junit.Before;
|
22 |
|
|
import org.junit.Test;
|
23 |
|
|
|
24 |
45883
|
claudio.at
|
import static eu.dnetlib.data.graph.utils.RelDescriptor.QUALIFIER_SEPARATOR;
|
25 |
45856
|
sandro.lab
|
|
26 |
|
|
/**
|
27 |
|
|
* Created by sandro on 2/13/17.
|
28 |
|
|
*/
|
29 |
|
|
public class DLIConvertertTest extends AbstractTransformerTest {
|
30 |
|
|
|
31 |
45883
|
claudio.at
|
private static final Log log = LogFactory.getLog(DLIConvertertTest.class);
|
32 |
45856
|
sandro.lab
|
|
33 |
45883
|
claudio.at
|
private Ontologies ontologies;
|
34 |
|
|
|
35 |
45856
|
sandro.lab
|
@Before
|
36 |
|
|
public void setUp() throws Exception {
|
37 |
|
|
factory = new XsltRowTransformerFactory();
|
38 |
45883
|
claudio.at
|
ontologies = OntologyLoader.loadOntologiesFromCp();
|
39 |
45856
|
sandro.lab
|
}
|
40 |
|
|
|
41 |
|
|
|
42 |
|
|
@Test
|
43 |
|
|
public void testLinkPangaeaDLI() throws Exception {
|
44 |
|
|
|
45 |
|
|
final List<Row> rows = Lists.newArrayList();
|
46 |
|
|
rows.addAll(asRows(loadFromTransformationProfile("dmfdli2hbase.xml"), load("record_dli_dmf.xml")));
|
47 |
|
|
rows.addAll(asRows(loadFromTransformationProfile("dmfdli2hbase.xml"), load("record_dli_dmf2.xml")));
|
48 |
|
|
rows.addAll(asRows(loadFromTransformationProfile("pmfdli2hbase.xml"), load("record_dli_pmf.xml")));
|
49 |
45883
|
claudio.at
|
printAll(mapAll(buildTable(rows)));
|
50 |
45856
|
sandro.lab
|
}
|
51 |
|
|
|
52 |
46233
|
sandro.lab
|
private void printAll(final Map<String, DNGFDLISummaryConverter> converters) {
|
53 |
|
|
converters.forEach((id, converter) -> log.info(JsonFormat.printToString(converter.convert())));
|
54 |
|
|
}
|
55 |
45883
|
claudio.at
|
|
56 |
46233
|
sandro.lab
|
private Map<String, DNGFDLISummaryConverter> mapAll(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
|
57 |
|
|
final Map<String, DNGFDLISummaryConverter> builders = Maps.newHashMap();
|
58 |
|
|
for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
|
59 |
|
|
map(builders, e.getKey(), e.getValue());
|
60 |
45883
|
claudio.at
|
}
|
61 |
|
|
return builders;
|
62 |
|
|
}
|
63 |
|
|
|
64 |
46233
|
sandro.lab
|
private void map(final Map<String, DNGFDLISummaryConverter> converters, final String rowKey, final Map<String, Map<String, byte[]>> row) throws Exception {
|
65 |
45883
|
claudio.at
|
|
66 |
|
|
final Type type = DNGFRowKeyDecoder.decode(rowKey).getType();
|
67 |
|
|
|
68 |
|
|
final Map<String, byte[]> familyMap = row.get("metadata");
|
69 |
|
|
|
70 |
|
|
if (familyMap == null) return;
|
71 |
|
|
|
72 |
|
|
final byte[] bodyB = familyMap.get(type.toString());
|
73 |
|
|
|
74 |
|
|
if (bodyB != null) {
|
75 |
|
|
ensureBuilder(converters, rowKey);
|
76 |
|
|
final DNGF mainEntity = DNGFDecoder.decode(
|
77 |
|
|
familyMap.get(type.toString()),
|
78 |
|
|
DliFieldTypeProtos.completionStatus, DliProtos.completionStatus, DliProtos.resolvedfrom, DliProtos.typedIdentifier).getDNGF();
|
79 |
|
|
|
80 |
|
|
converters.get(rowKey).setMainEntity(mainEntity);
|
81 |
|
|
|
82 |
|
|
final Map<String, byte[]> rels = row.get("rels");
|
83 |
|
|
|
84 |
|
|
rels.forEach((q, v) -> {
|
85 |
|
|
|
86 |
|
|
final DNGF.Builder rel = DNGF.newBuilder(HBaseTableDAO.parseProto(v));
|
87 |
|
|
rel.getRelBuilder().setCachedTarget(mainEntity.getEntity());
|
88 |
|
|
|
89 |
|
|
final String source = rel.getRel().getSource();
|
90 |
|
|
final Type sourceType = rel.getRel().getSourceType();
|
91 |
|
|
|
92 |
|
|
rel.getRelBuilder().setSource(rel.getRel().getTarget());
|
93 |
|
|
rel.getRelBuilder().setSourceType(rel.getRel().getTargetType());
|
94 |
|
|
|
95 |
|
|
rel.getRelBuilder().setTarget(source);
|
96 |
|
|
rel.getRelBuilder().setTargetType(sourceType);
|
97 |
|
|
|
98 |
|
|
final Ontology o = ontologies.get(rel.getRel().getRelType().getSchemeid());
|
99 |
|
|
final String inverse = o.inverseOf(rel.getRel().getRelType().getClassid());
|
100 |
|
|
rel.getRelBuilder().getRelTypeBuilder().setClassid(inverse).setClassname(inverse);
|
101 |
|
|
|
102 |
|
|
final String targetId = StringUtils.substringAfter(q, QUALIFIER_SEPARATOR);
|
103 |
|
|
|
104 |
|
|
ensureBuilder(converters, targetId);
|
105 |
46039
|
sandro.lab
|
converters.get(targetId).addRelation(rel.build());
|
106 |
45883
|
claudio.at
|
});
|
107 |
|
|
}
|
108 |
|
|
|
109 |
|
|
}
|
110 |
|
|
|
111 |
46233
|
sandro.lab
|
private void ensureBuilder(final Map<String, DNGFDLISummaryConverter> builders, final String rowKey) {
|
112 |
45883
|
claudio.at
|
if (!builders.containsKey(rowKey)) {
|
113 |
46233
|
sandro.lab
|
builders.put(rowKey, new DNGFDLISummaryConverter());
|
114 |
45883
|
claudio.at
|
}
|
115 |
|
|
}
|
116 |
|
|
|
117 |
45856
|
sandro.lab
|
}
|