Project

General

Profile

1
package eu.dnetlib.data.transform;
2

    
3
import static org.junit.Assert.assertFalse;
4
import static org.junit.Assert.assertNotNull;
5

    
6
import java.io.InputStream;
7
import java.util.HashMap;
8
import java.util.List;
9
import java.util.Map;
10
import java.util.Map.Entry;
11

    
12
import javax.xml.transform.TransformerConfigurationException;
13
import javax.xml.transform.TransformerFactoryConfigurationError;
14

    
15
import org.apache.commons.io.IOUtils;
16
import org.dom4j.DocumentException;
17
import org.junit.Before;
18
import org.junit.Test;
19

    
20
import com.google.common.collect.Lists;
21
import com.google.common.collect.Maps;
22
import com.google.protobuf.InvalidProtocolBufferException;
23

    
24
import eu.dnetlib.data.mapreduce.hbase.index.config.ContextMapper;
25
import eu.dnetlib.data.mapreduce.hbase.index.config.EntityConfigTable;
26
import eu.dnetlib.data.mapreduce.hbase.index.config.IndexConfig;
27
import eu.dnetlib.data.mapreduce.hbase.index.config.IndexConfigTest;
28
import eu.dnetlib.data.mapreduce.hbase.index.config.LinkDescriptor;
29
import eu.dnetlib.data.mapreduce.hbase.index.config.RelClasses;
30
import eu.dnetlib.data.mapreduce.hbase.index.config.RelClassesTest;
31
import eu.dnetlib.data.mapreduce.util.OafDecoder;
32
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
33
import eu.dnetlib.data.mapreduce.util.RelDescriptor;
34
import eu.dnetlib.data.mapreduce.util.XmlRecordFactory;
35
import eu.dnetlib.data.mapreduce.util.XmlRecordFactoryTest;
36
import eu.dnetlib.data.proto.KindProtos.Kind;
37
import eu.dnetlib.data.proto.OafProtos.Oaf;
38
import eu.dnetlib.data.proto.OafProtos.OafRel;
39
import eu.dnetlib.data.proto.TypeProtos.Type;
40
import eu.dnetlib.miscutils.functional.xml.IndentXmlString;
41

    
42
public class XsltRowTransformerFactoryTest {
43

    
44
	private XsltRowTransformerFactory factory;
45

    
46
	private EntityConfigTable entityConfigTable;
47

    
48
	// private final InputStream datacite2insertActionsInputStream =
49
	// getClass().getResourceAsStream("/eu/dnetlib/actionmanager/xslt/datacite2insertActions.xslt");
50

    
51
	private final InputStream dmf2hbaseInputStream = getClass().getResourceAsStream("dmf_2_hbase.xsl");
52

    
53
	private final InputStream datacite2hbaseInputStream = getClass().getResourceAsStream("datacite_2_hbase.xsl");
54

    
55
	private final InputStream project2hbaseInputStream = getClass().getResourceAsStream("projects_2_hbase.xsl");
56

    
57
	private final InputStream recordInputStream = getClass().getResourceAsStream("record.xml");
58

    
59
	private final InputStream recordDataciteInputStream = getClass().getResourceAsStream("recordDatacite.xml");
60

    
61
	private final InputStream projectRecordInputStream = getClass().getResourceAsStream("projectRecord.xml");
62

    
63
	private final InputStream recordClaimInputStream = getClass().getResourceAsStream("recordClaim.xml");
64

    
65
	private final InputStream recordPangaeOafInputStream = getClass().getResourceAsStream("pangaeOAF.xml");
66

    
67
	private final InputStream recordPangaeOdfInputStream = getClass().getResourceAsStream("pangaeODF.xml");
68

    
69
	@Before
70
	public void setUp() throws Exception {
71
		factory = new XsltRowTransformerFactory();
72
		entityConfigTable = IndexConfig.load(IndexConfigTest.config).getConfigMap();
73
	}
74

    
75
	@Test
76
	public void testParseClaim() throws Exception {
77

    
78
		doTest(dmf2hbaseInputStream, recordClaimInputStream);
79
	}
80

    
81
	@Test
82
	public void testParseProjectWithStats() throws Exception {
83

    
84
		doTest(project2hbaseInputStream, projectRecordInputStream);
85
	}
86

    
87
	@Test
88
	public void testParseDmf() throws Exception {
89

    
90
		doTest(dmf2hbaseInputStream, recordInputStream);
91
	}
92

    
93
	@Test
94
	public void testParseDatacite() throws Exception {
95

    
96
		doTest(datacite2hbaseInputStream, recordDataciteInputStream);
97
	}
98

    
99
	@Test
100
	public void testParsePangaeOAF() throws Exception {
101

    
102
		doTest(dmf2hbaseInputStream, recordPangaeOafInputStream);
103
	}
104

    
105
	@Test
106
	public void testParsePangaeODF() throws Exception {
107

    
108
		doTest(datacite2hbaseInputStream, recordPangaeOdfInputStream);
109
	}
110

    
111
	@Test
112
	public void testLinkPangae() throws Exception {
113

    
114
		List<Row> rows = Lists.newArrayList();
115
		rows.addAll(asRows(datacite2hbaseInputStream, recordPangaeOdfInputStream));
116
		rows.addAll(asRows(dmf2hbaseInputStream, recordPangaeOafInputStream));
117

    
118
		print(mapAll(buildTable(rows)));
119
	}
120

    
121
	private void doTest(final InputStream xsltStream, final InputStream recordStream) throws Exception {
122
		try {
123
			List<Row> rows = asRows(xsltStream, recordStream);
124

    
125
			// System.out.println(rows);
126

    
127
			Map<String, Map<String, Map<String, byte[]>>> table = buildTable(rows);
128

    
129
			// System.out.println("\n" + table.toString());
130

    
131
			Map<String, XmlRecordFactory> builders = mapAll(table);
132

    
133
			print(builders);
134
		} catch (InvalidProtocolBufferException e) {
135
			throw new Exception(e);
136
		} catch (TransformerConfigurationException e) {
137
			throw new Exception(e);
138
		} catch (TransformerFactoryConfigurationError e) {
139
			throw new Exception(e);
140
		} catch (DocumentException e) {
141
			throw new Exception(e);
142
		}
143
	}
144

    
145
	private List<Row> asRows(final InputStream xsltStream, final InputStream recordStream) throws Exception {
146
		String xslt = IOUtils.toString(xsltStream);
147
		XsltRowTransformer transformer = factory.getTransformer(xslt);
148
		assertNotNull(transformer);
149

    
150
		String record = IOUtils.toString(recordStream);
151
		List<Row> rows = transformer.apply(record);
152

    
153
		assertNotNull(rows);
154
		assertFalse(rows.isEmpty());
155
		return rows;
156
	}
157

    
158
	private Map<String, Map<String, Map<String, byte[]>>> buildTable(final List<Row> rows) {
159
		Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
160

    
161
		for (Row row : rows) {
162
			String rowKey = row.getKey();
163
			String cf = row.getColumnFamily();
164
			if (!table.containsKey(rowKey)) {
165
				table.put(rowKey, new HashMap<String, Map<String, byte[]>>());
166
			}
167
			if (!table.get(rowKey).containsKey(cf)) {
168
				table.get(rowKey).put(row.getColumnFamily(), new HashMap<String, byte[]>());
169
			}
170
			for (Column<String, byte[]> c : row.getColumns()) {
171
				System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
172
				table.get(rowKey).get(cf).put(c.getName(), c.getValue());
173
			}
174
		}
175
		return table;
176
	}
177

    
178
	private Map<String, XmlRecordFactory> mapAll(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
179

    
180
		Map<String, XmlRecordFactory> builders = Maps.newHashMap();
181
		for (Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
182
			map(builders, e.getKey(), e.getValue());
183
		}
184
		return builders;
185
	}
186

    
187
	private void map(final Map<String, XmlRecordFactory> builders, final String rowKey, final Map<String, Map<String, byte[]>> row) throws Exception {
188

    
189
		final Type type = OafRowKeyDecoder.decode(rowKey).getType();
190

    
191
		Map<String, byte[]> colEntity = row.get(type.toString());
192

    
193
		if (colEntity == null) return;
194

    
195
		byte[] bodyB = colEntity.get("body");
196

    
197
		if (bodyB != null) {
198
			ensureBuilder(builders, rowKey);
199
			OafDecoder mainEntity = OafDecoder.decode(Oaf.parseFrom(bodyB));
200
			builders.get(rowKey).setMainEntity(mainEntity);
201

    
202
			for (LinkDescriptor ld : entityConfigTable.getDescriptors(type)) {
203

    
204
				String it = ld.getRelDescriptor().getIt();
205
				Map<String, byte[]> cols = row.get(it);
206

    
207
				if ((cols != null) && (!cols.isEmpty())) {
208

    
209
					for (byte[] oafB : cols.values()) {
210
						Oaf oaf = Oaf.parseFrom(oafB);
211

    
212
						OafRel.Builder relBuilder = OafRel.newBuilder(oaf.getRel());
213

    
214
						if (ld.isSymmetric()) {
215
							RelDescriptor rd = ld.getRelDescriptor();
216
							relBuilder.setCachedTarget(mainEntity.getEntity()).setRelType(rd.getRelType()).setSubRelType(rd.getSubRelType());
217
						}
218

    
219
						OafRel oafRel = relBuilder.setChild(ld.isChild()).build();
220

    
221
						final Oaf.Builder oafBuilder = Oaf.newBuilder().setKind(Kind.relation).setTimestamp(System.currentTimeMillis());
222
						oafBuilder.getRelBuilder().mergeFrom(oafRel);
223

    
224
						String targetId = ld.isSymmetric() ? oafRel.getTarget() : oafRel.getSource();
225
						ensureBuilder(builders, targetId);
226
						OafDecoder decoder = OafDecoder.decode(oafBuilder.build());
227
						if (ld.isChild()) {
228
							builders.get(targetId).addChild(decoder);
229
						} else {
230
							builders.get(targetId).addRelation(decoder);
231
						}
232

    
233
					}
234
				}
235
			}
236
		}
237

    
238
	}
239

    
240
	private void print(final Map<String, XmlRecordFactory> builders) {
241
		for (Entry<String, XmlRecordFactory> e : builders.entrySet()) {
242
			if (e.getValue().isValid()) {
243
				System.out.println(IndentXmlString.apply(e.getValue().build()));
244
			} else {
245
				System.out.println("invalid builder: " + e.getKey());
246
			}
247
		}
248
	}
249

    
250
	private void ensureBuilder(final Map<String, XmlRecordFactory> builders, final String rowKey) throws Exception {
251
		if (!builders.containsKey(rowKey)) {
252
			builders.put(rowKey, newBuilder());
253
		}
254
	}
255

    
256
	private XmlRecordFactory newBuilder() throws TransformerConfigurationException, TransformerFactoryConfigurationError, DocumentException {
257
		return new XmlRecordFactory(entityConfigTable, ContextMapper.fromXml(eu.dnetlib.data.mapreduce.hbase.index.config.Context.xml),
258
				RelClasses.fromJSon(RelClassesTest.relClassesJson), XmlRecordFactoryTest.SCHEMA_LOCATION, true, false, false);
259
	}
260

    
261
	@Test
262
	public void test_template() throws Exception {
263
		String xslt = IOUtils.toString(dmf2hbaseInputStream);
264
		XsltRowTransformer transformer = factory.getTransformer(xslt);
265
		assertNotNull(transformer);
266

    
267
		String record = IOUtils.toString(recordInputStream);
268
		List<Row> rows = transformer.apply(record);
269

    
270
		System.out.println(rows);
271
	}
272

    
273
}
    (1-1/1)