1
|
package eu.dnetlib.data.transform;
|
2
|
|
3
|
import static org.junit.Assert.assertFalse;
|
4
|
import static org.junit.Assert.assertNotNull;
|
5
|
|
6
|
import java.io.InputStream;
|
7
|
import java.util.HashMap;
|
8
|
import java.util.List;
|
9
|
import java.util.Map;
|
10
|
import java.util.Map.Entry;
|
11
|
|
12
|
import javax.xml.transform.TransformerConfigurationException;
|
13
|
import javax.xml.transform.TransformerFactoryConfigurationError;
|
14
|
|
15
|
import org.apache.commons.io.IOUtils;
|
16
|
import org.dom4j.DocumentException;
|
17
|
import org.junit.Before;
|
18
|
import org.junit.Test;
|
19
|
|
20
|
import com.google.common.collect.Lists;
|
21
|
import com.google.common.collect.Maps;
|
22
|
import com.google.protobuf.InvalidProtocolBufferException;
|
23
|
|
24
|
import eu.dnetlib.data.mapreduce.hbase.index.config.ContextMapper;
|
25
|
import eu.dnetlib.data.mapreduce.hbase.index.config.EntityConfigTable;
|
26
|
import eu.dnetlib.data.mapreduce.hbase.index.config.IndexConfig;
|
27
|
import eu.dnetlib.data.mapreduce.hbase.index.config.IndexConfigTest;
|
28
|
import eu.dnetlib.data.mapreduce.hbase.index.config.LinkDescriptor;
|
29
|
import eu.dnetlib.data.mapreduce.hbase.index.config.RelClasses;
|
30
|
import eu.dnetlib.data.mapreduce.hbase.index.config.RelClassesTest;
|
31
|
import eu.dnetlib.data.mapreduce.util.OafDecoder;
|
32
|
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
|
33
|
import eu.dnetlib.data.mapreduce.util.RelDescriptor;
|
34
|
import eu.dnetlib.data.mapreduce.util.XmlRecordFactory;
|
35
|
import eu.dnetlib.data.mapreduce.util.XmlRecordFactoryTest;
|
36
|
import eu.dnetlib.data.proto.KindProtos.Kind;
|
37
|
import eu.dnetlib.data.proto.OafProtos.Oaf;
|
38
|
import eu.dnetlib.data.proto.OafProtos.OafRel;
|
39
|
import eu.dnetlib.data.proto.TypeProtos.Type;
|
40
|
import eu.dnetlib.miscutils.functional.xml.IndentXmlString;
|
41
|
|
42
|
public class XsltRowTransformerFactoryTest {
|
43
|
|
44
|
private XsltRowTransformerFactory factory;
|
45
|
|
46
|
private EntityConfigTable entityConfigTable;
|
47
|
|
48
|
// private final InputStream datacite2insertActionsInputStream =
|
49
|
// getClass().getResourceAsStream("/eu/dnetlib/actionmanager/xslt/datacite2insertActions.xslt");
|
50
|
|
51
|
private final InputStream dmf2hbaseInputStream = getClass().getResourceAsStream("dmf_2_hbase.xsl");
|
52
|
|
53
|
private final InputStream datacite2hbaseInputStream = getClass().getResourceAsStream("datacite_2_hbase.xsl");
|
54
|
|
55
|
private final InputStream project2hbaseInputStream = getClass().getResourceAsStream("projects_2_hbase.xsl");
|
56
|
|
57
|
private final InputStream recordInputStream = getClass().getResourceAsStream("record.xml");
|
58
|
|
59
|
private final InputStream recordDataciteInputStream = getClass().getResourceAsStream("recordDatacite.xml");
|
60
|
|
61
|
private final InputStream projectRecordInputStream = getClass().getResourceAsStream("projectRecord.xml");
|
62
|
|
63
|
private final InputStream recordClaimInputStream = getClass().getResourceAsStream("recordClaim.xml");
|
64
|
|
65
|
private final InputStream recordPangaeOafInputStream = getClass().getResourceAsStream("pangaeOAF.xml");
|
66
|
|
67
|
private final InputStream recordPangaeOdfInputStream = getClass().getResourceAsStream("pangaeODF.xml");
|
68
|
|
69
|
@Before
|
70
|
public void setUp() throws Exception {
|
71
|
factory = new XsltRowTransformerFactory();
|
72
|
entityConfigTable = IndexConfig.load(IndexConfigTest.config).getConfigMap();
|
73
|
}
|
74
|
|
75
|
@Test
|
76
|
public void testParseClaim() throws Exception {
|
77
|
|
78
|
doTest(dmf2hbaseInputStream, recordClaimInputStream);
|
79
|
}
|
80
|
|
81
|
@Test
|
82
|
public void testParseProjectWithStats() throws Exception {
|
83
|
|
84
|
doTest(project2hbaseInputStream, projectRecordInputStream);
|
85
|
}
|
86
|
|
87
|
@Test
|
88
|
public void testParseDmf() throws Exception {
|
89
|
|
90
|
doTest(dmf2hbaseInputStream, recordInputStream);
|
91
|
}
|
92
|
|
93
|
@Test
|
94
|
public void testParseDatacite() throws Exception {
|
95
|
|
96
|
doTest(datacite2hbaseInputStream, recordDataciteInputStream);
|
97
|
}
|
98
|
|
99
|
@Test
|
100
|
public void testParsePangaeOAF() throws Exception {
|
101
|
|
102
|
doTest(dmf2hbaseInputStream, recordPangaeOafInputStream);
|
103
|
}
|
104
|
|
105
|
@Test
|
106
|
public void testParsePangaeODF() throws Exception {
|
107
|
|
108
|
doTest(datacite2hbaseInputStream, recordPangaeOdfInputStream);
|
109
|
}
|
110
|
|
111
|
@Test
|
112
|
public void testLinkPangae() throws Exception {
|
113
|
|
114
|
List<Row> rows = Lists.newArrayList();
|
115
|
rows.addAll(asRows(datacite2hbaseInputStream, recordPangaeOdfInputStream));
|
116
|
rows.addAll(asRows(dmf2hbaseInputStream, recordPangaeOafInputStream));
|
117
|
|
118
|
print(mapAll(buildTable(rows)));
|
119
|
}
|
120
|
|
121
|
private void doTest(final InputStream xsltStream, final InputStream recordStream) throws Exception {
|
122
|
try {
|
123
|
List<Row> rows = asRows(xsltStream, recordStream);
|
124
|
|
125
|
// System.out.println(rows);
|
126
|
|
127
|
Map<String, Map<String, Map<String, byte[]>>> table = buildTable(rows);
|
128
|
|
129
|
// System.out.println("\n" + table.toString());
|
130
|
|
131
|
Map<String, XmlRecordFactory> builders = mapAll(table);
|
132
|
|
133
|
print(builders);
|
134
|
} catch (InvalidProtocolBufferException e) {
|
135
|
throw new Exception(e);
|
136
|
} catch (TransformerConfigurationException e) {
|
137
|
throw new Exception(e);
|
138
|
} catch (TransformerFactoryConfigurationError e) {
|
139
|
throw new Exception(e);
|
140
|
} catch (DocumentException e) {
|
141
|
throw new Exception(e);
|
142
|
}
|
143
|
}
|
144
|
|
145
|
private List<Row> asRows(final InputStream xsltStream, final InputStream recordStream) throws Exception {
|
146
|
String xslt = IOUtils.toString(xsltStream);
|
147
|
XsltRowTransformer transformer = factory.getTransformer(xslt);
|
148
|
assertNotNull(transformer);
|
149
|
|
150
|
String record = IOUtils.toString(recordStream);
|
151
|
List<Row> rows = transformer.apply(record);
|
152
|
|
153
|
assertNotNull(rows);
|
154
|
assertFalse(rows.isEmpty());
|
155
|
return rows;
|
156
|
}
|
157
|
|
158
|
private Map<String, Map<String, Map<String, byte[]>>> buildTable(final List<Row> rows) {
|
159
|
Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
|
160
|
|
161
|
for (Row row : rows) {
|
162
|
String rowKey = row.getKey();
|
163
|
String cf = row.getColumnFamily();
|
164
|
if (!table.containsKey(rowKey)) {
|
165
|
table.put(rowKey, new HashMap<String, Map<String, byte[]>>());
|
166
|
}
|
167
|
if (!table.get(rowKey).containsKey(cf)) {
|
168
|
table.get(rowKey).put(row.getColumnFamily(), new HashMap<String, byte[]>());
|
169
|
}
|
170
|
for (Column<String, byte[]> c : row.getColumns()) {
|
171
|
System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
|
172
|
table.get(rowKey).get(cf).put(c.getName(), c.getValue());
|
173
|
}
|
174
|
}
|
175
|
return table;
|
176
|
}
|
177
|
|
178
|
private Map<String, XmlRecordFactory> mapAll(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
|
179
|
|
180
|
Map<String, XmlRecordFactory> builders = Maps.newHashMap();
|
181
|
for (Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
|
182
|
map(builders, e.getKey(), e.getValue());
|
183
|
}
|
184
|
return builders;
|
185
|
}
|
186
|
|
187
|
private void map(final Map<String, XmlRecordFactory> builders, final String rowKey, final Map<String, Map<String, byte[]>> row) throws Exception {
|
188
|
|
189
|
final Type type = OafRowKeyDecoder.decode(rowKey).getType();
|
190
|
|
191
|
Map<String, byte[]> colEntity = row.get(type.toString());
|
192
|
|
193
|
if (colEntity == null) return;
|
194
|
|
195
|
byte[] bodyB = colEntity.get("body");
|
196
|
|
197
|
if (bodyB != null) {
|
198
|
ensureBuilder(builders, rowKey);
|
199
|
OafDecoder mainEntity = OafDecoder.decode(Oaf.parseFrom(bodyB));
|
200
|
builders.get(rowKey).setMainEntity(mainEntity);
|
201
|
|
202
|
for (LinkDescriptor ld : entityConfigTable.getDescriptors(type)) {
|
203
|
|
204
|
String it = ld.getRelDescriptor().getIt();
|
205
|
Map<String, byte[]> cols = row.get(it);
|
206
|
|
207
|
if ((cols != null) && (!cols.isEmpty())) {
|
208
|
|
209
|
for (byte[] oafB : cols.values()) {
|
210
|
Oaf oaf = Oaf.parseFrom(oafB);
|
211
|
|
212
|
OafRel.Builder relBuilder = OafRel.newBuilder(oaf.getRel());
|
213
|
|
214
|
if (ld.isSymmetric()) {
|
215
|
RelDescriptor rd = ld.getRelDescriptor();
|
216
|
relBuilder.setCachedTarget(mainEntity.getEntity()).setRelType(rd.getRelType()).setSubRelType(rd.getSubRelType());
|
217
|
}
|
218
|
|
219
|
OafRel oafRel = relBuilder.setChild(ld.isChild()).build();
|
220
|
|
221
|
final Oaf.Builder oafBuilder = Oaf.newBuilder().setKind(Kind.relation).setTimestamp(System.currentTimeMillis());
|
222
|
oafBuilder.getRelBuilder().mergeFrom(oafRel);
|
223
|
|
224
|
String targetId = ld.isSymmetric() ? oafRel.getTarget() : oafRel.getSource();
|
225
|
ensureBuilder(builders, targetId);
|
226
|
OafDecoder decoder = OafDecoder.decode(oafBuilder.build());
|
227
|
if (ld.isChild()) {
|
228
|
builders.get(targetId).addChild(decoder);
|
229
|
} else {
|
230
|
builders.get(targetId).addRelation(decoder);
|
231
|
}
|
232
|
|
233
|
}
|
234
|
}
|
235
|
}
|
236
|
}
|
237
|
|
238
|
}
|
239
|
|
240
|
private void print(final Map<String, XmlRecordFactory> builders) {
|
241
|
for (Entry<String, XmlRecordFactory> e : builders.entrySet()) {
|
242
|
if (e.getValue().isValid()) {
|
243
|
System.out.println(IndentXmlString.apply(e.getValue().build()));
|
244
|
} else {
|
245
|
System.out.println("invalid builder: " + e.getKey());
|
246
|
}
|
247
|
}
|
248
|
}
|
249
|
|
250
|
private void ensureBuilder(final Map<String, XmlRecordFactory> builders, final String rowKey) throws Exception {
|
251
|
if (!builders.containsKey(rowKey)) {
|
252
|
builders.put(rowKey, newBuilder());
|
253
|
}
|
254
|
}
|
255
|
|
256
|
private XmlRecordFactory newBuilder() throws TransformerConfigurationException, TransformerFactoryConfigurationError, DocumentException {
|
257
|
return new XmlRecordFactory(entityConfigTable, ContextMapper.fromXml(eu.dnetlib.data.mapreduce.hbase.index.config.Context.xml),
|
258
|
RelClasses.fromJSon(RelClassesTest.relClassesJson), XmlRecordFactoryTest.SCHEMA_LOCATION, true, false, false);
|
259
|
}
|
260
|
|
261
|
@Test
|
262
|
public void test_template() throws Exception {
|
263
|
String xslt = IOUtils.toString(dmf2hbaseInputStream);
|
264
|
XsltRowTransformer transformer = factory.getTransformer(xslt);
|
265
|
assertNotNull(transformer);
|
266
|
|
267
|
String record = IOUtils.toString(recordInputStream);
|
268
|
List<Row> rows = transformer.apply(record);
|
269
|
|
270
|
System.out.println(rows);
|
271
|
}
|
272
|
|
273
|
}
|