1
|
package eu.dnetlib.data.transform;
|
2
|
|
3
|
import com.google.common.collect.Lists;
|
4
|
import com.google.common.collect.Maps;
|
5
|
import com.googlecode.protobuf.format.JsonFormat;
|
6
|
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder;
|
7
|
import eu.dnetlib.data.mapreduce.util.dao.HBaseTableDAO;
|
8
|
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
|
9
|
import eu.dnetlib.data.proto.TypeProtos.Type;
|
10
|
|
11
|
import eu.dnetlib.data.proto.dli.Scholix2ObjectProtos;
|
12
|
import eu.dnetlib.dli.proto.DNGF2Scholix2Converter;
|
13
|
import eu.dnetlib.dli.proto.DNGFDLISummaryConverter;
|
14
|
import org.apache.commons.lang3.StringUtils;
|
15
|
import org.apache.commons.logging.Log;
|
16
|
import org.apache.commons.logging.LogFactory;
|
17
|
import org.junit.Before;
|
18
|
import org.junit.Test;
|
19
|
|
20
|
import java.util.Iterator;
|
21
|
import java.util.List;
|
22
|
import java.util.Map;
|
23
|
import java.util.Map.Entry;
|
24
|
|
25
|
import static eu.dnetlib.data.graph.utils.RelDescriptor.QUALIFIER_SEPARATOR;
|
26
|
import static eu.dnetlib.data.proto.dli.Scholix2ObjectProtos.*;
|
27
|
|
28
|
/**
|
29
|
* Created by sandro on 2/13/17.
|
30
|
*/
|
31
|
public class DLIConvertertTest extends AbstractTransformerTest {
|
32
|
|
33
|
private static final Log log = LogFactory.getLog(DLIConvertertTest.class);
|
34
|
|
35
|
private Ontologies ontologies;
|
36
|
|
37
|
@Before
|
38
|
public void setUp() throws Exception {
|
39
|
factory = new XsltRowTransformerFactory();
|
40
|
ontologies = OntologyLoader.loadOntologiesFromCp();
|
41
|
}
|
42
|
|
43
|
@Test
|
44
|
public void testLinkPangaeaDLI() throws Exception {
|
45
|
|
46
|
final List<Row> rows = Lists.newArrayList();
|
47
|
rows.addAll(asRows(loadFromTransformationProfile("dmfdli2hbase.xml"), load("record_dli_dmf.xml")));
|
48
|
rows.addAll(asRows(loadFromTransformationProfile("dmfdli2hbase.xml"), load("record_dli_dmf2.xml")));
|
49
|
// rows.addAll(asRows(loadFromTransformationProfile("pmfdli2hbase.xml"), load("record_dli_pmf.xml")));
|
50
|
printAllSummary(mapAllSummary(buildTable(rows)));
|
51
|
}
|
52
|
|
53
|
@Test
|
54
|
public void testLinkPangaeaDLIScholix() throws Exception {
|
55
|
|
56
|
final List<Row> rows = Lists.newArrayList();
|
57
|
rows.addAll(asRows(loadFromTransformationProfile("dmfdli2hbase.xml"), load("record_dli_dmf2.xml")));
|
58
|
rows.addAll(asRows(loadFromTransformationProfile("dmfdli2hbase.xml"), load("record_dli_dmf.xml")));
|
59
|
// rows.forEach(row ->
|
60
|
// row.getColumns().forEach(
|
61
|
// result -> {
|
62
|
// if (result != null) {
|
63
|
//
|
64
|
//
|
65
|
// final DNGFDecoder decoder =
|
66
|
// DNGFDecoder.decode(result.getValue(), DliFieldTypeProtos.completionStatus, DliProtos.completionStatus, DliProtos.resolvedfrom, DliProtos.typedIdentifier);
|
67
|
// System.out.println(row.getKey());
|
68
|
// Put put = HBaseTableDAO.asPutByCollectedFrom(decoder.getDNGF());
|
69
|
// System.out.println("put.getTimeStamp() = " + put.getTimeStamp());
|
70
|
//
|
71
|
// System.out.println("decoder.getDNGF().toString() = " + decoder.getDNGF().toString());
|
72
|
// }
|
73
|
// }
|
74
|
// ));
|
75
|
|
76
|
|
77
|
|
78
|
|
79
|
// //rows.addAll(asRows(loadFromTransformationProfile("pmfdli2hbase.xml"), load("record_dli_pmf.xml")));
|
80
|
reduceScholix(mapAllScholix(buildTable(rows)));
|
81
|
}
|
82
|
|
83
|
private void reduceScholix(final Map<String, List<Scholix>> scholix) {
|
84
|
scholix.forEach((id, list) -> {
|
85
|
final Iterator<Scholix> it = list.iterator();
|
86
|
final Scholix source = it.next();
|
87
|
if (source.hasSource()) {
|
88
|
it.forEachRemaining(s -> log.info(JsonFormat.printToString(
|
89
|
Scholix.newBuilder(s)
|
90
|
.setSource(source.getSource())
|
91
|
.build())));
|
92
|
} else {
|
93
|
log.warn("missing source in scholix: " + id);
|
94
|
}
|
95
|
}
|
96
|
);
|
97
|
}
|
98
|
|
99
|
private void printAllSummary(final Map<String, DNGFDLISummaryConverter> converters) {
|
100
|
converters.forEach((id, converter) -> log.info(JsonFormat.printToString(converter.convert())));
|
101
|
}
|
102
|
|
103
|
private void printAllScholix(final Map<String, List<Scholix>> converters) {
|
104
|
converters.values().forEach(converter -> converter.forEach(s -> log.info(JsonFormat.printToString(s))));
|
105
|
}
|
106
|
|
107
|
private Map<String, DNGFDLISummaryConverter> mapAllSummary(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
|
108
|
final Map<String, DNGFDLISummaryConverter> builders = Maps.newHashMap();
|
109
|
for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
|
110
|
mapSummary(builders, e.getKey(), e.getValue());
|
111
|
}
|
112
|
return builders;
|
113
|
}
|
114
|
|
115
|
private Map<String, List<Scholix>> mapAllScholix(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
|
116
|
final Map<String, List<Scholix>> builders = Maps.newHashMap();
|
117
|
for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
|
118
|
mapScholix(builders, e.getKey(), e.getValue());
|
119
|
}
|
120
|
return builders;
|
121
|
}
|
122
|
|
123
|
private void mapScholix(final Map<String, List<Scholix>> builders, final String rowKey, final Map<String, Map<String, byte[]>> row) {
|
124
|
|
125
|
final Type type = DNGFRowKeyDecoder.decode(rowKey).getType();
|
126
|
final Map<String, byte[]> familyMap = row.get("metadata");
|
127
|
|
128
|
if (familyMap == null) return;
|
129
|
|
130
|
final byte[] bodyB = familyMap.get(type.toString());
|
131
|
|
132
|
if (bodyB != null) {
|
133
|
ensureScholixBuilder(builders, rowKey);
|
134
|
final DNGF entity = HBaseTableDAO.parseProto(familyMap.get(type.toString()));
|
135
|
|
136
|
final Scholix.Builder source = DNGF2Scholix2Converter.withSource(entity.getEntity());
|
137
|
builders.get(rowKey).add(source.build());
|
138
|
|
139
|
final Map<String, byte[]> rels = row.get("rels");
|
140
|
rels.forEach((q, v) -> {
|
141
|
final DNGF.Builder r = DNGF.newBuilder(HBaseTableDAO.parseProto(v));
|
142
|
final Scholix.Builder target = DNGF2Scholix2Converter.withTarget(entity.getEntity(), r.getRel());
|
143
|
ensureScholixBuilder(builders, r.getRel().getTarget());
|
144
|
builders.get(r.getRel().getTarget()).add(target.build());
|
145
|
});
|
146
|
}
|
147
|
}
|
148
|
|
149
|
private void mapSummary(final Map<String, DNGFDLISummaryConverter> converters, final String rowKey, final Map<String, Map<String, byte[]>> row) throws Exception {
|
150
|
|
151
|
final Type type = DNGFRowKeyDecoder.decode(rowKey).getType();
|
152
|
|
153
|
final Map<String, byte[]> familyMap = row.get("metadata");
|
154
|
|
155
|
if (familyMap == null) return;
|
156
|
|
157
|
final byte[] bodyB = familyMap.get(type.toString());
|
158
|
|
159
|
if (bodyB != null) {
|
160
|
ensureSummaryBuilder(converters, rowKey);
|
161
|
|
162
|
final DNGF mainEntity = HBaseTableDAO.parseProto(familyMap.get(type.toString()));
|
163
|
converters.get(rowKey).setMainEntity(mainEntity);
|
164
|
|
165
|
final Map<String, byte[]> rels = row.get("rels");
|
166
|
|
167
|
rels.forEach((q, v) -> {
|
168
|
|
169
|
final DNGF.Builder rel = DNGF.newBuilder(HBaseTableDAO.parseProto(v));
|
170
|
rel.getRelBuilder().setCachedTarget(mainEntity.getEntity());
|
171
|
|
172
|
final String source = rel.getRel().getSource();
|
173
|
final Type sourceType = rel.getRel().getSourceType();
|
174
|
|
175
|
rel.getRelBuilder().setSource(rel.getRel().getTarget());
|
176
|
rel.getRelBuilder().setSourceType(rel.getRel().getTargetType());
|
177
|
|
178
|
rel.getRelBuilder().setTarget(source);
|
179
|
rel.getRelBuilder().setTargetType(sourceType);
|
180
|
|
181
|
final Ontology o = ontologies.get(rel.getRel().getRelType().getSchemeid());
|
182
|
final String inverse = o.inverseOf(rel.getRel().getRelType().getClassid());
|
183
|
rel.getRelBuilder().getRelTypeBuilder().setClassid(inverse).setClassname(inverse);
|
184
|
|
185
|
final String targetId = StringUtils.substringAfter(q, QUALIFIER_SEPARATOR);
|
186
|
|
187
|
ensureSummaryBuilder(converters, targetId);
|
188
|
converters.get(targetId).addRelation(rel.build());
|
189
|
});
|
190
|
}
|
191
|
|
192
|
}
|
193
|
|
194
|
private void ensureSummaryBuilder(final Map<String, DNGFDLISummaryConverter> builders, final String rowKey) {
|
195
|
if (!builders.containsKey(rowKey)) {
|
196
|
builders.put(rowKey, new DNGFDLISummaryConverter());
|
197
|
}
|
198
|
}
|
199
|
|
200
|
private void ensureScholixBuilder(final Map<String, List<Scholix>> builders, final String rowKey) {
|
201
|
if (!builders.containsKey(rowKey)) {
|
202
|
builders.put(rowKey, Lists.newLinkedList());
|
203
|
}
|
204
|
}
|
205
|
|
206
|
}
|