Project

General

Profile

1 26600 sandro.lab
package eu.dnetlib.data.transform;
2
3 39431 claudio.at
import java.io.*;
4
import java.util.*;
5 30967 claudio.at
import java.util.Map.Entry;
6 35746 alessia.ba
import java.util.zip.GZIPInputStream;
7 30967 claudio.at
import javax.xml.transform.TransformerConfigurationException;
8
import javax.xml.transform.TransformerFactoryConfigurationError;
9
10 44483 claudio.at
import com.google.common.base.Function;
11
import com.google.common.collect.Iterables;
12 31997 claudio.at
import com.google.common.collect.Lists;
13 30967 claudio.at
import com.google.common.collect.Maps;
14 38025 claudio.at
import com.google.common.collect.Sets;
15 26600 sandro.lab
import com.google.protobuf.InvalidProtocolBufferException;
16 39431 claudio.at
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
17
import eu.dnetlib.data.mapreduce.util.*;
18 26600 sandro.lab
import eu.dnetlib.data.proto.KindProtos.Kind;
19
import eu.dnetlib.data.proto.OafProtos.Oaf;
20 30967 claudio.at
import eu.dnetlib.data.proto.TypeProtos.Type;
21 26600 sandro.lab
import eu.dnetlib.miscutils.functional.xml.IndentXmlString;
22 39431 claudio.at
import org.apache.commons.io.IOUtils;
23
import org.apache.commons.lang.StringUtils;
24 40063 alessia.ba
import org.apache.commons.logging.Log;
25
import org.apache.commons.logging.LogFactory;
26 39431 claudio.at
import org.dom4j.Document;
27
import org.dom4j.DocumentException;
28
import org.dom4j.io.SAXReader;
29
import org.json.JSONObject;
30
import org.junit.Before;
31 48702 claudio.at
import org.junit.Ignore;
32 39431 claudio.at
import org.junit.Test;
33 26600 sandro.lab
34 39431 claudio.at
import static org.junit.Assert.*;
35
36 26600 sandro.lab
public class XsltRowTransformerFactoryTest {
37
38 40063 alessia.ba
	private static final Log log = LogFactory.getLog(XsltRowTransformerFactoryTest.class);
39 52562 alessia.ba
	private static String basePathProfiles = "/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/2hbase/";
40 26600 sandro.lab
	private XsltRowTransformerFactory factory;
41 30967 claudio.at
	private EntityConfigTable entityConfigTable;
42 26600 sandro.lab
43
	@Before
44
	public void setUp() throws Exception {
45
		factory = new XsltRowTransformerFactory();
46 30967 claudio.at
		entityConfigTable = IndexConfig.load(IndexConfigTest.config).getConfigMap();
47 26600 sandro.lab
	}
48
49
	@Test
50 48702 claudio.at
	@Ignore // need to reimplement because claimUpdates_2_hbase.xsl was removed
51 42534 alessia.ba
	public void testParseOafClaimUpdate() throws Exception {
52
		doTest(loadFromTransformationProfile("claimUpdates_2_hbase.xsl"), load("recordClaimUpdate.xml"));
53
	}
54
55
	@Test
56 48702 claudio.at
	@Ignore // need to reimplement because claimUpdates_2_hbase.xsl was removed
57 39616 claudio.at
	public void testParseClaimUpdate() throws Exception {
58
59 41468 claudio.at
		final List<Row> rows = Lists.newArrayList();
60
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordClaim.xml")));
61
		rows.addAll(asRows(loadFromTransformationProfile("claimUpdates_2_hbase.xsl"), load("recordClaimUpdate.xml")));
62
63
		printAll(mapAll(buildTable(rows)));
64 39616 claudio.at
	}
65
66
	@Test
67 52543 alessia.ba
	public void testParseClaimRel() throws Exception {
68
69
		doTest(loadFromTransformationProfile("claimRels_2_hbase.xml"), load("recordClaimRel.xml"));
70
	}
71
72
73
	@Test
74 49718 claudio.at
	public void testParseFp7IctPUB() throws Exception {
75
76
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("ec_fp7_ict.xml"));
77
	}
78
79
	@Test
80 52524 claudio.at
	public void testParseRecordCrossref() throws Exception {
81
82
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordCrossref.xml"));
83
	}
84
85
	@Test
86 38586 claudio.at
	public void testParseDatasetPUB() throws Exception {
87
88 40063 alessia.ba
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatasetPUB.xml"));
89 38586 claudio.at
	}
90
91
	@Test
92 52422 claudio.at
	public void testParseSoftwareEgiApp() throws Exception {
93
94
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("softwareEgiApp.xml"));
95
	}
96
97
	@Test
98
	public void testParseSoftwareEgiApp2() throws Exception {
99
100
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("softwareEgiApp2.xml"));
101
	}
102
103
	@Test
104
	public void testParseOrpEgiApp() throws Exception {
105
106
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("orpEgiApp.xml"));
107
	}
108
109
	@Test
110 48854 claudio.at
	public void testParseDatasetLindat() throws Exception {
111
112
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("datasetLindat.xml"));
113
	}
114
115
	@Test
116 31997 claudio.at
	public void testParseClaim() throws Exception {
117 26600 sandro.lab
118 40063 alessia.ba
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordClaim.xml"));
119 30967 claudio.at
	}
120 26600 sandro.lab
121 30967 claudio.at
	@Test
122 42825 alessia.ba
	public void testParseClaimDataset() throws Exception {
123
124
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordClaimDataset.xml"));
125
	}
126
127 52543 alessia.ba
128
129 42825 alessia.ba
	@Test
130 39431 claudio.at
	public void testParseACM() throws Exception {
131
132 40063 alessia.ba
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordACM.xml"));
133 39431 claudio.at
	}
134
135
	@Test
136 39616 claudio.at
	public void testParseASB() throws Exception {
137
138 40063 alessia.ba
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordASB.xml"));
139 39616 claudio.at
	}
140
141
	@Test
142 33382 claudio.at
	public void testParseProjectCorda() throws Exception {
143 26600 sandro.lab
144 40205 claudio.at
		doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml"));
145 26600 sandro.lab
	}
146
147
	@Test
148 33382 claudio.at
	public void testParseProjectFCT() throws Exception {
149
150 40205 claudio.at
		doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordFCT.xml"));
151 33382 claudio.at
	}
152
153 46587 alessia.ba
154 33382 claudio.at
	@Test
155 40205 claudio.at
	public void testParseOaf() throws Exception {
156 26600 sandro.lab
157 40063 alessia.ba
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("record.xml"));
158 30967 claudio.at
	}
159 26600 sandro.lab
160 30967 claudio.at
	@Test
161 40205 claudio.at
	public void testParseOafPublication() throws Exception {
162
163
		doTest(loadFromTransformationProfile("oaf_publication2hbase.xml"), load("record.xml"));
164
	}
165
166
	@Test
167 43558 claudio.at
	public void testParseLindat() throws Exception {
168
169
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordLindat.xml"));
170
	}
171
172
	@Test
173 31997 claudio.at
	public void testParseDatacite() throws Exception {
174 26600 sandro.lab
175 40063 alessia.ba
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatacite.xml"));
176 30967 claudio.at
	}
177 26600 sandro.lab
178 31997 claudio.at
	@Test
179 33382 claudio.at
	public void testParseDatacite2() throws Exception {
180
181 40063 alessia.ba
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatacite2.xml"));
182 33382 claudio.at
	}
183
184
	@Test
185 42495 alessia.ba
	public void testParseOpenTrials() throws Exception {
186
187
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("opentrials_datacite1.xml"));
188
	}
189
190
	@Test
191 32094 claudio.at
	public void testLinkPangaea() throws Exception {
192 26600 sandro.lab
193 34438 claudio.at
		final List<Row> rows = Lists.newArrayList();
194 40205 claudio.at
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("pangaeODF1.xml")));
195
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("pangaeODF2.xml")));
196 40063 alessia.ba
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("pangaeOAF.xml")));
197 40205 claudio.at
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCordaPangaea.xml")));
198 31997 claudio.at
199 38025 claudio.at
		printAll(mapAll(buildTable(rows)));
200 31997 claudio.at
	}
201
202 33382 claudio.at
	@Test
203 43795 alessia.ba
	public void testPangaea() throws Exception {
204
205
		final List<Row> rows = Lists.newArrayList();
206
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("pangaeOAF2.xml")));
207
		printAll(mapAll(buildTable(rows)));
208
	}
209 45034 alessia.ba
	@Test
210
	public void testZenodo() throws Exception {
211 43795 alessia.ba
212 45034 alessia.ba
		final List<Row> rows = Lists.newArrayList();
213
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("zenodoData.xml")));
214
		printAll(mapAll(buildTable(rows)));
215
	}
216
217 43795 alessia.ba
	@Test
218 52193 alessia.ba
	public void testZenodoSoftware() throws Exception {
219
220
		final List<Row> rows = Lists.newArrayList();
221
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("softwareZenodo_odf.xml")));
222
		printAll(mapAll(buildTable(rows)));
223
	}
224
225
	@Test
226 35179 michele.ar
	public void testLinkCorda() throws Exception {
227
228
		final List<Row> rows = Lists.newArrayList();
229 40205 claudio.at
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
230 40063 alessia.ba
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordCorda.xml")));
231 35179 michele.ar
232 38025 claudio.at
		printAll(mapAll(buildTable(rows)));
233 35179 michele.ar
	}
234
235
	@Test
236 33382 claudio.at
	public void testLinkFCT() throws Exception {
237
238 34438 claudio.at
		final List<Row> rows = Lists.newArrayList();
239 40205 claudio.at
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordFCT.xml")));
240 40063 alessia.ba
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordFCT.xml")));
241 33382 claudio.at
242 38025 claudio.at
		printAll(mapAll(buildTable(rows)));
243 33382 claudio.at
	}
244
245
	@Test
246 41468 claudio.at
	public void testLinkARC() throws Exception {
247
248
		final List<Row> rows = Lists.newArrayList();
249
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordARC.xml")));
250
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordARC.xml")));
251
252
		printAll(mapAll(buildTable(rows)));
253
	}
254
255
	@Test
256 33382 claudio.at
	public void testLinkWT() throws Exception {
257
258 34438 claudio.at
		final List<Row> rows = Lists.newArrayList();
259 40205 claudio.at
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordWT.xml")));
260 40063 alessia.ba
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordWT.xml")));
261 33382 claudio.at
262 38025 claudio.at
		printAll(mapAll(buildTable(rows)));
263 33382 claudio.at
	}
264
265 43169 alessia.ba
266
267 34438 claudio.at
	@Test
268
	public void testLinkOrganization() throws Exception {
269
270
		final List<Row> rows = Lists.newArrayList();
271 40205 claudio.at
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organization.xml")));
272
		rows.addAll(asRows(loadFromTransformationProfile("projectorganization_2_hbase.xsl"), load("project_organization.xml")));
273
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
274 34438 claudio.at
275 38025 claudio.at
		printAll(mapAll(buildTable(rows)));
276 34438 claudio.at
	}
277
278 35746 alessia.ba
	@Test
279 46587 alessia.ba
	public void testLinkOrganizationAffiliation() throws Exception {
280
281
		final List<Row> rows = Lists.newArrayList();
282
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organization.xml")));
283
		rows.addAll(asRows(loadFromTransformationProfile("resultorganization_2_hbase.xsl"), load("result_organization.xml")));
284
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("record.xml")));
285
286
		printAll(mapAll(buildTable(rows)));
287
	}
288
289
	@Test
290 44483 claudio.at
	public void testDuplicates() throws Exception {
291
		final String mergeId = "50|dedup_wf_001::08ed625d07e5738b794ff14d6773fd9f";
292
		final List<Row> rows = Lists.newArrayList();
293
294 49029 claudio.at
		final Function<Row, Row> f = rowIn -> {
295 44483 claudio.at
296 49029 claudio.at
			final List<Column<String,byte[]>> cols = Lists.newArrayList();
297
			for(Column<String,byte[]> col : rowIn.getColumns()) {
298
				if (col.getName().equals("body")) {
299
					cols.add(new Column(col.getName(), col.getValue()));
300 44483 claudio.at
301
				}
302
			}
303 49029 claudio.at
			return new Row("result", rowIn.getKey(), cols);
304 44483 claudio.at
		};
305
306
		final List<Row> puma1 = asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordPuma1.xml"), f);
307
		puma1.add(new Row("resultResult_dedup_isMergedIn", mergeId));
308
309
		final List<Row> puma2 = asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordPuma2.xml"), f);
310
		puma2.add(new Row("resultResult_dedup_isMergedIn", mergeId));
311
312
		rows.addAll(puma1);
313
		rows.addAll(puma2);
314
315
		List<Oaf> duplicates = Lists.newArrayList();
316
		duplicates.add(getOafBody(puma1));
317
		duplicates.add(getOafBody(puma2));
318
		final Oaf.Builder oafMerge = OafEntityMerger.merge(mergeId, duplicates);
319
320 48697 claudio.at
		final Row mergeRow = new Row("result", mergeId, Lists.newArrayList(new Column("body", oafMerge.build().toByteArray())));
321 44483 claudio.at
322
		rows.add(mergeRow);
323
324
		printAll(mapAll(buildTable(rows)));
325
	}
326
327
	private Oaf getOafBody(final List<Row> rows) throws InvalidProtocolBufferException {
328
		for(Row row : rows) {
329
			if(StringUtils.startsWith(row.getKey(), "50")) {
330
				return Oaf.parseFrom(row.getColumn("body").getValue());
331
332
			}
333
		}
334
		return null;
335
	}
336
337
	@Test
338 35746 alessia.ba
	public void testParseDoajOAF() throws Exception {
339
340 40063 alessia.ba
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("doajUniversityRecord.xml"));
341 35746 alessia.ba
	}
342
343 39888 alessia.ba
	@Test
344
	public void testParseDatasource() throws Exception {
345
346 40205 claudio.at
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("datasourceNative.xml"));
347 39888 alessia.ba
	}
348 44899 alessia.ba
	@Test
349
	public void testParseDatasourcePiwik() throws Exception {
350 39888 alessia.ba
351 44899 alessia.ba
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("datasourcePiwik.xml"));
352
	}
353
354 40205 claudio.at
	@Test
355 40341 alessia.ba
	public void testParseDataDatasource() throws Exception {
356
357
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("dataDatasource.xml"));
358
	}
359
360
	@Test
361 36164 claudio.at
	public void testFromMongodbCompressedDump() throws Exception {
362 40063 alessia.ba
		doTestJsonGz(loadFromTransformationProfile("oaf2hbase.xml"), load("mdstore_cleaned.json.gz"));
363 35746 alessia.ba
	}
364
365 40205 claudio.at
	@Test
366
	public void testLoadFromTransformationProfile() throws IOException {
367
		InputStream in = loadFromTransformationProfile("oaf2hbase.xml");
368
		log.info(IOUtils.toString(in));
369
	}
370
371
	@Test
372
	public void test_template() throws Exception {
373
		final String xslt = IOUtils.toString(loadFromTransformationProfile("oaf2hbase.xml"));
374
		final XsltRowTransformer transformer = factory.getTransformer(xslt);
375
		assertNotNull(transformer);
376
377
		final String record = IOUtils.toString(load("record.xml"));
378
		final List<Row> rows = transformer.apply(record);
379
380
		System.out.println(rows);
381
	}
382
383
	@Test
384
	public void testWrongCharsOrganization() throws Exception {
385
		final List<Row> rows = Lists.newArrayList();
386
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organizationWrongChars.xml")));
387
		printAll(mapAll(buildTable(rows)));
388
	}
389
390 43169 alessia.ba
	@Test
391
	public void testParseProjectWithFunderOriginalName() throws Exception {
392
393
		doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectWithFunderOriginalName.xml"));
394
	}
395
	@Test
396
	public void testLinkFunderOriginalName() throws Exception {
397
398
		final List<Row> rows = Lists.newArrayList();
399
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectWithFunderOriginalName.xml")));
400
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordFunderOriginalName.xml")));
401
402
		printAll(mapAll(buildTable(rows)));
403
	}
404
405 44070 alessia.ba
	@Test
406
	public void testProjectExtraInfo() throws Exception {
407
		final List<Row> rows = Lists.newArrayList();
408
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordExtraInfo.xml")));
409
		printAll(mapAll(buildTable(rows)));
410
	}
411
412 48697 claudio.at
	@Test
413
	public void testParseSoftwareFromODF() throws Exception {
414
		final List<Row> rows = Lists.newArrayList();
415
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("softwareODF.xml")));
416
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
417
		printAll(mapAll(buildTable(rows)));
418
	}
419
420 52212 alessia.ba
	@Test
421 48697 claudio.at
	public void testParseSoftwareFromOAF() throws Exception {
422
		final List<Row> rows = Lists.newArrayList();
423 52212 alessia.ba
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordOAFsoftwareCLOSED.xml")));
424 48697 claudio.at
		printAll(mapAll(buildTable(rows)));
425
	}
426
427 52212 alessia.ba
	@Test
428
	public void testParseSoftwareFromOAFOpen() throws Exception {
429
		final List<Row> rows = Lists.newArrayList();
430
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordOAFsoftwareOPEN.xml")));
431
		printAll(mapAll(buildTable(rows)));
432
	}
433
434 52275 alessia.ba
	@Test
435
	public void testParseOafWithExternalRef() throws Exception {
436
		final List<Row> rows = Lists.newArrayList();
437
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("oafWithExternalReference.xml")));
438
		printAll(mapAll(buildTable(rows)));
439
	}
440
441 52277 alessia.ba
	@Test
442
	public void testParseOafWithCommunity() throws Exception {
443
		final List<Row> rows = Lists.newArrayList();
444
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("oafWithCommunity.xml")));
445
		printAll(mapAll(buildTable(rows)));
446
	}
447
448 31997 claudio.at
	private void doTest(final InputStream xsltStream, final InputStream recordStream) throws Exception {
449
		try {
450 34438 claudio.at
			final List<Row> rows = asRows(xsltStream, recordStream);
451 31997 claudio.at
452 40063 alessia.ba
			log.info(rows);
453 26600 sandro.lab
454 34438 claudio.at
			final Map<String, Map<String, Map<String, byte[]>>> table = buildTable(rows);
455 26600 sandro.lab
456 30967 claudio.at
			// System.out.println("\n" + table.toString());
457 26600 sandro.lab
458 34438 claudio.at
			final Map<String, XmlRecordFactory> builders = mapAll(table);
459 28094 claudio.at
460 38025 claudio.at
			printAll(builders);
461 34438 claudio.at
		} catch (final InvalidProtocolBufferException e) {
462 31997 claudio.at
			throw new Exception(e);
463 34438 claudio.at
		} catch (final TransformerConfigurationException e) {
464 31997 claudio.at
			throw new Exception(e);
465 34438 claudio.at
		} catch (final TransformerFactoryConfigurationError e) {
466 31997 claudio.at
			throw new Exception(e);
467 34438 claudio.at
		} catch (final DocumentException e) {
468 31997 claudio.at
			throw new Exception(e);
469 26600 sandro.lab
		}
470
	}
471
472 35746 alessia.ba
	private void doTestJsonGz(final InputStream xsltStream, final InputStream recordStream) throws Exception {
473
474 36164 claudio.at
		final Iterator<List<Row>> rowsIterator = asRowsJsonGzip(xsltStream, recordStream);
475 35746 alessia.ba
476 36164 claudio.at
		int i = 0;
477
		while (rowsIterator.hasNext()) {
478
			final List<Row> rows = rowsIterator.next();
479
			i++;
480 35746 alessia.ba
481 36164 claudio.at
			if ((i % 10000) == 0) {
482
				System.out.println(i);
483
			}
484 35746 alessia.ba
485 36164 claudio.at
			final Map<String, Map<String, Map<String, byte[]>>> table = buildTableDoaj(rows);
486 35746 alessia.ba
487 36164 claudio.at
			for (final Map<String, Map<String, byte[]>> m : table.values()) {
488
				for (final Map<String, byte[]> mv : m.values()) {
489
					for (final byte[] v : mv.values()) {
490
						final OafDecoder d = OafDecoder.decode(v);
491
						assertNotNull(d);
492
						assertNotNull(d.getOaf());
493
494
						switch (d.getKind()) {
495
						case entity:
496
							assertNotNull(d.getMetadata());
497
							if (d.getOaf().getEntity().getType().equals(Type.result)) {
498
								System.out.println(d.getOaf());
499
							}
500
							break;
501
						case relation:
502
							assertNotNull(d.getRel());
503
							break;
504
						default:
505
							break;
506
						}
507
					}
508
				}
509 35746 alessia.ba
			}
510
		}
511
	}
512
513 44483 claudio.at
	private List<Row> asRows(final InputStream xsltStream, final InputStream recordStream, final Function<Row, Row> p) throws Exception {
514 48697 claudio.at
		return asRows(xsltStream, new HashMap<>(), recordStream, p);
515 44483 claudio.at
	}
516
517 31997 claudio.at
	private List<Row> asRows(final InputStream xsltStream, final InputStream recordStream) throws Exception {
518 48697 claudio.at
		return asRows(xsltStream, new HashMap<>(), recordStream);
519 38025 claudio.at
	}
520
521
	private List<Row> asRows(final InputStream xsltStream, final Map<String, Object> params, final InputStream recordStream) throws Exception {
522 44483 claudio.at
		return asRows(xsltStream, params, recordStream, null);
523
	}
524
525
	private List<Row> asRows(final InputStream xsltStream, final Map<String, Object> params, final InputStream recordStream, final Function<Row, Row> p) throws Exception {
526 34438 claudio.at
		final String xslt = IOUtils.toString(xsltStream);
527 38025 claudio.at
		final XsltRowTransformer transformer = factory.getTransformer(xslt, params);
528 31997 claudio.at
		assertNotNull(transformer);
529 26600 sandro.lab
530 34438 claudio.at
		final String record = IOUtils.toString(recordStream);
531
		final List<Row> rows = transformer.apply(record);
532 31997 claudio.at
533
		assertNotNull(rows);
534
		assertFalse(rows.isEmpty());
535 44483 claudio.at
		return p == null ? rows : Lists.newArrayList(Iterables.transform(rows, p));
536 31997 claudio.at
	}
537
538 35746 alessia.ba
	private Iterator<List<Row>> asRowsJsonGzip(final InputStream xsltStream, final InputStream recordStreamJsonGzip) throws Exception {
539
		final String xslt = IOUtils.toString(xsltStream);
540
		final XsltRowTransformer transformer = factory.getTransformer(xslt);
541
		assertNotNull(transformer);
542
		assertNotNull(recordStreamJsonGzip);
543
544 36164 claudio.at
		final GZIPInputStream stream = new GZIPInputStream(recordStreamJsonGzip);
545 35746 alessia.ba
		assertNotNull(stream);
546
		final BufferedReader inStream = new BufferedReader(new InputStreamReader(stream));
547
		assertNotNull(inStream);
548
		return new Iterator<List<Row>>() {
549
550
			String jsonRecord = null;
551
552
			@Override
553
			public boolean hasNext() {
554
				try {
555
					return (jsonRecord = inStream.readLine()) != null;
556 36164 claudio.at
				} catch (final IOException e) {
557 35746 alessia.ba
					throw new RuntimeException(e);
558
				}
559
			}
560
561
			@Override
562
			public List<Row> next() {
563
564 36164 claudio.at
				final JSONObject jsonObj = new JSONObject(jsonRecord);
565
				final String body = jsonObj.getString("body");
566
				try {
567
					assertTrue(StringUtils.isNotBlank(body));
568
					// System.out.println(body);
569
					final List<Row> rows = transformer.apply(body);
570
					assertNotNull(rows);
571
					assertFalse(rows.isEmpty());
572
					return rows;
573
				} catch (final Throwable e) {
574
					System.err.println("error transforming document: " + body);
575
					throw new RuntimeException(e);
576
				}
577 35746 alessia.ba
			}
578
579
			@Override
580
			public void remove() {
581 43394 claudio.at
				throw new UnsupportedOperationException();
582 35746 alessia.ba
			}
583
584
		};
585
586
	}
587
588
	private Map<String, Map<String, Map<String, byte[]>>> buildTableDoaj(final List<Row> rows) throws UnsupportedEncodingException {
589 34438 claudio.at
		final Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
590 31997 claudio.at
591 34438 claudio.at
		for (final Row row : rows) {
592
			final String rowKey = row.getKey();
593
			final String cf = row.getColumnFamily();
594 31997 claudio.at
			if (!table.containsKey(rowKey)) {
595 48697 claudio.at
				table.put(rowKey, new HashMap<>());
596 31997 claudio.at
			}
597
			if (!table.get(rowKey).containsKey(cf)) {
598 48697 claudio.at
				table.get(rowKey).put(row.getColumnFamily(), new HashMap<>());
599 31997 claudio.at
			}
600 34438 claudio.at
			for (final Column<String, byte[]> c : row.getColumns()) {
601 35746 alessia.ba
				// System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
602
				table.get(rowKey).get(cf).put(c.getName(), c.getValue());
603
				if (cf.equals("result") && c.getName().equals("body")) {
604 36164 claudio.at
					// System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
605 35746 alessia.ba
					assertTrue(StringUtils.isNotBlank(new String(c.getValue(), "UTF-8")));
606
				}
607
			}
608
		}
609
		return table;
610
611
	}
612
613
	private Map<String, Map<String, Map<String, byte[]>>> buildTable(final List<Row> rows) throws UnsupportedEncodingException {
614
		final Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
615
616
		for (final Row row : rows) {
617
			final String rowKey = row.getKey();
618
			final String cf = row.getColumnFamily();
619
			if (!table.containsKey(rowKey)) {
620 49029 claudio.at
				table.put(rowKey, new HashMap<>());
621 35746 alessia.ba
			}
622
			if (!table.get(rowKey).containsKey(cf)) {
623 49029 claudio.at
				table.get(rowKey).put(row.getColumnFamily(), new HashMap<>());
624 35746 alessia.ba
			}
625
			for (final Column<String, byte[]> c : row.getColumns()) {
626 31997 claudio.at
				System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
627
				table.get(rowKey).get(cf).put(c.getName(), c.getValue());
628 38586 claudio.at
				if (c.getName().equals("body")) {
629 36164 claudio.at
					final String theBody = new String(c.getValue(), "UTF-8");
630 35746 alessia.ba
					assertTrue(StringUtils.isNotBlank(theBody));
631 44070 alessia.ba
					//System.out.println(theBody);
632 35746 alessia.ba
				}
633 31997 claudio.at
			}
634
		}
635
		return table;
636 35746 alessia.ba
637 31997 claudio.at
	}
638
639
	private Map<String, XmlRecordFactory> mapAll(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
640
641 34438 claudio.at
		final Map<String, XmlRecordFactory> builders = Maps.newHashMap();
642
		for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
643 31997 claudio.at
			map(builders, e.getKey(), e.getValue());
644
		}
645
		return builders;
646
	}
647
648 35746 alessia.ba
	// private Map<String, XmlRecordFactory> mapResultsOnly(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
649
	//
650
	// final Map<String, XmlRecordFactory> builders = Maps.newHashMap();
651
	// for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
652
	// final Type type = OafRowKeyDecoder.decode(e.getKey()).getType();
653
	// if (type == Type.result) {
654
	// map(builders, e.getKey(), e.getValue());
655
	// }
656
	// }
657
	// return builders;
658
	// }
659
660 31997 claudio.at
	private void map(final Map<String, XmlRecordFactory> builders, final String rowKey, final Map<String, Map<String, byte[]>> row) throws Exception {
661
662 30967 claudio.at
		final Type type = OafRowKeyDecoder.decode(rowKey).getType();
663 26600 sandro.lab
664 41468 claudio.at
		final Map<String, byte[]> familyMap = row.get(type.toString());
665 26600 sandro.lab
666 41468 claudio.at
		if (familyMap == null) return;
667 26600 sandro.lab
668 41468 claudio.at
		final byte[] bodyB = familyMap.get("body");
669 26600 sandro.lab
670 30967 claudio.at
		if (bodyB != null) {
671
			ensureBuilder(builders, rowKey);
672 41468 claudio.at
673
			final Oaf oaf = UpdateMerger.mergeBodyUpdates(familyMap);
674
675
			final OafDecoder mainEntity = OafDecoder.decode(oaf);
676
677 30967 claudio.at
			builders.get(rowKey).setMainEntity(mainEntity);
678 28094 claudio.at
679 34438 claudio.at
			for (final LinkDescriptor ld : entityConfigTable.getDescriptors(type)) {
680 26600 sandro.lab
681 34438 claudio.at
				final String it = ld.getRelDescriptor().getIt();
682
				final Map<String, byte[]> cols = row.get(it);
683 26600 sandro.lab
684 35746 alessia.ba
				if ((cols != null) && !cols.isEmpty()) {
685 26600 sandro.lab
686 34438 claudio.at
					for (final byte[] oafB : cols.values()) {
687 26600 sandro.lab
688 41468 claudio.at
						final Oaf.Builder relBuilder = Oaf.newBuilder(Oaf.parseFrom(oafB));
689 26600 sandro.lab
690 30967 claudio.at
						if (ld.isSymmetric()) {
691 34438 claudio.at
							final RelDescriptor rd = ld.getRelDescriptor();
692 40205 claudio.at
693
							relBuilder.getRelBuilder().setCachedTarget(mainEntity.getEntity()).setRelType(rd.getRelType()).setSubRelType(rd.getSubRelType());
694 30967 claudio.at
						}
695 26600 sandro.lab
696 40205 claudio.at
						relBuilder.getRelBuilder().setChild(ld.isChild());
697 26600 sandro.lab
698 40314 claudio.at
						final Oaf.Builder oafBuilder = Oaf.newBuilder().setKind(Kind.relation).setLastupdatetimestamp(System.currentTimeMillis());
699 40205 claudio.at
						oafBuilder.mergeFrom(relBuilder.build());
700 26600 sandro.lab
701 40205 claudio.at
						final String targetId = ld.isSymmetric() ? oafBuilder.getRel().getTarget() : oafBuilder.getRel().getSource();
702 30967 claudio.at
						ensureBuilder(builders, targetId);
703 34438 claudio.at
						final OafDecoder decoder = OafDecoder.decode(oafBuilder.build());
704 37334 claudio.at
705 30967 claudio.at
						if (ld.isChild()) {
706 37334 claudio.at
							builders.get(targetId).addChild(type, decoder);
707 30967 claudio.at
						} else {
708 37334 claudio.at
							builders.get(targetId).addRelation(type, decoder);
709 30967 claudio.at
						}
710 37334 claudio.at
					}
711 26600 sandro.lab
712 30967 claudio.at
				}
713
			}
714 26600 sandro.lab
		}
715
716
	}
717
718 38025 claudio.at
	private void printAll(final Map<String, XmlRecordFactory> builders) throws DocumentException {
719
		print(Sets.newHashSet(Type.values()), builders, null);
720
	}
721
722
	private void print(final Set<Type> types, final Map<String, XmlRecordFactory> builders, final Map<Type, Set<String>> xpaths) throws DocumentException {
723
		final SAXReader r = new SAXReader();
724
725 34438 claudio.at
		for (final Entry<String, XmlRecordFactory> e : builders.entrySet()) {
726 38025 claudio.at
			final OafRowKeyDecoder kd = OafRowKeyDecoder.decode(e.getKey());
727
728
			if (!e.getValue().isValid()) throw new IllegalArgumentException("invalid builder: " + e.getKey());
729
			if (types.contains(kd.getType())) {
730
				final String val = IndentXmlString.apply(e.getValue().build());
731
732
				if ((xpaths != null) && !xpaths.isEmpty() && (xpaths.get(kd.getType()) != null)) {
733
					final Document doc = r.read(new StringReader(val));
734
735 40063 alessia.ba
					log.debug("\n" + e.getKey());
736 38025 claudio.at
					for (final String xpath : xpaths.get(kd.getType())) {
737 40063 alessia.ba
						log.debug(doc.valueOf(xpath));
738 38025 claudio.at
					}
739
				} else {
740 40063 alessia.ba
					log.info(val);
741 38025 claudio.at
				}
742 31997 claudio.at
			}
743
		}
744
	}
745
746 35746 alessia.ba
	private void printNoIndent(final Map<String, XmlRecordFactory> builders) {
747
		for (final Entry<String, XmlRecordFactory> e : builders.entrySet()) {
748
			if (e.getValue().isValid()) {
749 40063 alessia.ba
				log.debug(e.getValue().build());
750 35746 alessia.ba
			} else {
751 40063 alessia.ba
				log.debug("invalid builder: " + e.getKey());
752 35746 alessia.ba
			}
753
		}
754
	}
755
756 31997 claudio.at
	private void ensureBuilder(final Map<String, XmlRecordFactory> builders, final String rowKey) throws Exception {
757 30967 claudio.at
		if (!builders.containsKey(rowKey)) {
758
			builders.put(rowKey, newBuilder());
759 26600 sandro.lab
		}
760
	}
761
762 30967 claudio.at
	private XmlRecordFactory newBuilder() throws TransformerConfigurationException, TransformerFactoryConfigurationError, DocumentException {
763 46587 alessia.ba
		return new XmlRecordFactory(entityConfigTable, ContextMapper.fromXml(Context.xml),
764
				RelClasses.fromJSon(RelClassesTest.relClassesJson), XmlRecordFactoryTest.SCHEMA_LOCATION, true, false, false, XmlRecordFactoryTest.specialDatasourceTypes);
765 30967 claudio.at
	}
766
767 33382 claudio.at
	private InputStream load(final String fileName) {
768
		return getClass().getResourceAsStream(fileName);
769
	}
770
771 40063 alessia.ba
	private InputStream loadFromTransformationProfile(final String profilePath) {
772
		log.info("Loading xslt from: " + basePathProfiles + profilePath);
773
		InputStream profile = getClass().getResourceAsStream(basePathProfiles + profilePath);
774
		SAXReader saxReader = new SAXReader();
775
		Document doc = null;
776
		try {
777
			doc = saxReader.read(profile);
778
		} catch (DocumentException e) {
779
			e.printStackTrace();
780
			throw new RuntimeException(e);
781
		}
782
		String xslt = doc.selectSingleNode("//SCRIPT/CODE/*[local-name()='stylesheet']").asXML();
783 52422 claudio.at
		//log.info(xslt);
784 40063 alessia.ba
		return IOUtils.toInputStream(xslt);
785
	}
786
787 26600 sandro.lab
}