Project

General

Profile

1 26600 sandro.lab
package eu.dnetlib.data.transform;
2
3 39431 claudio.at
import java.io.*;
4
import java.util.*;
5 30967 claudio.at
import java.util.Map.Entry;
6 35746 alessia.ba
import java.util.zip.GZIPInputStream;
7 30967 claudio.at
import javax.xml.transform.TransformerConfigurationException;
8
import javax.xml.transform.TransformerFactoryConfigurationError;
9
10 44483 claudio.at
import com.google.common.base.Function;
11
import com.google.common.collect.Iterables;
12 31997 claudio.at
import com.google.common.collect.Lists;
13 30967 claudio.at
import com.google.common.collect.Maps;
14 38025 claudio.at
import com.google.common.collect.Sets;
15 26600 sandro.lab
import com.google.protobuf.InvalidProtocolBufferException;
16 53408 claudio.at
import com.googlecode.protobuf.format.JsonFormat;
17
import eu.dnetlib.actionmanager.actions.ActionFactory;
18
import eu.dnetlib.actionmanager.actions.XsltInfoPackageAction;
19
import eu.dnetlib.actionmanager.common.Agent;
20
import eu.dnetlib.actionmanager.common.Operation;
21
import eu.dnetlib.actionmanager.common.Provenance;
22 39431 claudio.at
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
23
import eu.dnetlib.data.mapreduce.util.*;
24 26600 sandro.lab
import eu.dnetlib.data.proto.KindProtos.Kind;
25
import eu.dnetlib.data.proto.OafProtos.Oaf;
26 30967 claudio.at
import eu.dnetlib.data.proto.TypeProtos.Type;
27 26600 sandro.lab
import eu.dnetlib.miscutils.functional.xml.IndentXmlString;
28 39431 claudio.at
import org.apache.commons.io.IOUtils;
29
import org.apache.commons.lang.StringUtils;
30 40063 alessia.ba
import org.apache.commons.logging.Log;
31
import org.apache.commons.logging.LogFactory;
32 39431 claudio.at
import org.dom4j.Document;
33
import org.dom4j.DocumentException;
34
import org.dom4j.io.SAXReader;
35
import org.json.JSONObject;
36
import org.junit.Before;
37 48702 claudio.at
import org.junit.Ignore;
38 39431 claudio.at
import org.junit.Test;
39 53408 claudio.at
import org.springframework.core.io.ByteArrayResource;
40
import org.springframework.core.io.Resource;
41 26600 sandro.lab
42 39431 claudio.at
import static org.junit.Assert.*;
43
44 26600 sandro.lab
public class XsltRowTransformerFactoryTest {
45
46 40063 alessia.ba
	private static final Log log = LogFactory.getLog(XsltRowTransformerFactoryTest.class);
47 52562 alessia.ba
	private static String basePathProfiles = "/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/2hbase/";
48 26600 sandro.lab
	private XsltRowTransformerFactory factory;
49 30967 claudio.at
	private EntityConfigTable entityConfigTable;
50 26600 sandro.lab
51
	@Before
52
	public void setUp() throws Exception {
53
		factory = new XsltRowTransformerFactory();
54 30967 claudio.at
		entityConfigTable = IndexConfig.load(IndexConfigTest.config).getConfigMap();
55 26600 sandro.lab
	}
56
57
	@Test
58 48702 claudio.at
	@Ignore // need to reimplement because claimUpdates_2_hbase.xsl was removed
59 42534 alessia.ba
	public void testParseOafClaimUpdate() throws Exception {
60
		doTest(loadFromTransformationProfile("claimUpdates_2_hbase.xsl"), load("recordClaimUpdate.xml"));
61
	}
62
63
	@Test
64 48702 claudio.at
	@Ignore // need to reimplement because claimUpdates_2_hbase.xsl was removed
65 39616 claudio.at
	public void testParseClaimUpdate() throws Exception {
66
67 41468 claudio.at
		final List<Row> rows = Lists.newArrayList();
68
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordClaim.xml")));
69
		rows.addAll(asRows(loadFromTransformationProfile("claimUpdates_2_hbase.xsl"), load("recordClaimUpdate.xml")));
70
71
		printAll(mapAll(buildTable(rows)));
72 39616 claudio.at
	}
73
74
	@Test
75 52543 alessia.ba
	public void testParseClaimRel() throws Exception {
76
77
		doTest(loadFromTransformationProfile("claimRels_2_hbase.xml"), load("recordClaimRel.xml"));
78
	}
79
80
81
	@Test
82 49718 claudio.at
	public void testParseFp7IctPUB() throws Exception {
83
84
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("ec_fp7_ict.xml"));
85
	}
86
87
	@Test
88 53983 claudio.at
	public void testParseEPMC() throws Exception {
89
90
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("epmc.xml"));
91
	}
92
93
	@Test
94 52524 claudio.at
	public void testParseRecordCrossref() throws Exception {
95
96
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordCrossref.xml"));
97
	}
98
99
	@Test
100 38586 claudio.at
	public void testParseDatasetPUB() throws Exception {
101
102 40063 alessia.ba
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatasetPUB.xml"));
103 38586 claudio.at
	}
104
105
	@Test
106 52422 claudio.at
	public void testParseSoftwareEgiApp() throws Exception {
107
108
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("softwareEgiApp.xml"));
109
	}
110
111
	@Test
112
	public void testParseSoftwareEgiApp2() throws Exception {
113
114
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("softwareEgiApp2.xml"));
115
	}
116
117
	@Test
118
	public void testParseOrpEgiApp() throws Exception {
119
120
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("orpEgiApp.xml"));
121
	}
122
123
	@Test
124 48854 claudio.at
	public void testParseDatasetLindat() throws Exception {
125
126
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("datasetLindat.xml"));
127
	}
128
129
	@Test
130 53362 miriam.bag
	public void testParseDatasetNeuroVault() throws Exception {
131
132
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordNeuroVault.xml"));
133
	}
134
135
	@Test
136 31997 claudio.at
	public void testParseClaim() throws Exception {
137 26600 sandro.lab
138 40063 alessia.ba
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordClaim.xml"));
139 30967 claudio.at
	}
140 26600 sandro.lab
141 30967 claudio.at
	@Test
142 42825 alessia.ba
	public void testParseClaimDataset() throws Exception {
143
144
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordClaimDataset.xml"));
145
	}
146
147 52543 alessia.ba
148 42825 alessia.ba
	@Test
149 39431 claudio.at
	public void testParseACM() throws Exception {
150
151 40063 alessia.ba
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordACM.xml"));
152 39431 claudio.at
	}
153
154
	@Test
155 39616 claudio.at
	public void testParseASB() throws Exception {
156
157 40063 alessia.ba
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordASB.xml"));
158 39616 claudio.at
	}
159
160
	@Test
161 33382 claudio.at
	public void testParseProjectCorda() throws Exception {
162 26600 sandro.lab
163 40205 claudio.at
		doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml"));
164 26600 sandro.lab
	}
165
166
	@Test
167 33382 claudio.at
	public void testParseProjectFCT() throws Exception {
168
169 40205 claudio.at
		doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordFCT.xml"));
170 33382 claudio.at
	}
171
172 46587 alessia.ba
173 33382 claudio.at
	@Test
174 40205 claudio.at
	public void testParseOaf() throws Exception {
175 26600 sandro.lab
176 40063 alessia.ba
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("record.xml"));
177 30967 claudio.at
	}
178 26600 sandro.lab
179 30967 claudio.at
	@Test
180 40205 claudio.at
	public void testParseOafPublication() throws Exception {
181
182 52980 alessia.ba
		doTest(loadFromTransformationProfile("oaf_entity2hbase.xml"), load("record.xml"));
183 40205 claudio.at
	}
184
185
	@Test
186 43558 claudio.at
	public void testParseLindat() throws Exception {
187
188
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordLindat.xml"));
189
	}
190
191
	@Test
192 31997 claudio.at
	public void testParseDatacite() throws Exception {
193 26600 sandro.lab
194 40063 alessia.ba
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatacite.xml"));
195 30967 claudio.at
	}
196 26600 sandro.lab
197 31997 claudio.at
	@Test
198 33382 claudio.at
	public void testParseDatacite2() throws Exception {
199
200 40063 alessia.ba
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatacite2.xml"));
201 33382 claudio.at
	}
202
203
	@Test
204 42495 alessia.ba
	public void testParseOpenTrials() throws Exception {
205
206
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("opentrials_datacite1.xml"));
207
	}
208
209
	@Test
210 32094 claudio.at
	public void testLinkPangaea() throws Exception {
211 26600 sandro.lab
212 34438 claudio.at
		final List<Row> rows = Lists.newArrayList();
213 40205 claudio.at
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("pangaeODF1.xml")));
214
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("pangaeODF2.xml")));
215 40063 alessia.ba
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("pangaeOAF.xml")));
216 40205 claudio.at
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCordaPangaea.xml")));
217 31997 claudio.at
218 38025 claudio.at
		printAll(mapAll(buildTable(rows)));
219 31997 claudio.at
	}
220
221 33382 claudio.at
	@Test
222 43795 alessia.ba
	public void testPangaea() throws Exception {
223
224
		final List<Row> rows = Lists.newArrayList();
225
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("pangaeOAF2.xml")));
226
		printAll(mapAll(buildTable(rows)));
227
	}
228 45034 alessia.ba
	@Test
229
	public void testZenodo() throws Exception {
230 43795 alessia.ba
231 45034 alessia.ba
		final List<Row> rows = Lists.newArrayList();
232
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("zenodoData.xml")));
233
		printAll(mapAll(buildTable(rows)));
234
	}
235
236 43795 alessia.ba
	@Test
237 52193 alessia.ba
	public void testZenodoSoftware() throws Exception {
238
239
		final List<Row> rows = Lists.newArrayList();
240
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("softwareZenodo_odf.xml")));
241
		printAll(mapAll(buildTable(rows)));
242
	}
243
244
	@Test
245 35179 michele.ar
	public void testLinkCorda() throws Exception {
246
247
		final List<Row> rows = Lists.newArrayList();
248 40205 claudio.at
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
249 40063 alessia.ba
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordCorda.xml")));
250 35179 michele.ar
251 38025 claudio.at
		printAll(mapAll(buildTable(rows)));
252 35179 michele.ar
	}
253
254
	@Test
255 33382 claudio.at
	public void testLinkFCT() throws Exception {
256
257 34438 claudio.at
		final List<Row> rows = Lists.newArrayList();
258 40205 claudio.at
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordFCT.xml")));
259 40063 alessia.ba
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordFCT.xml")));
260 33382 claudio.at
261 38025 claudio.at
		printAll(mapAll(buildTable(rows)));
262 33382 claudio.at
	}
263
264
	@Test
265 41468 claudio.at
	public void testLinkARC() throws Exception {
266
267
		final List<Row> rows = Lists.newArrayList();
268
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordARC.xml")));
269
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordARC.xml")));
270
271
		printAll(mapAll(buildTable(rows)));
272
	}
273
274
	@Test
275 33382 claudio.at
	public void testLinkWT() throws Exception {
276
277 34438 claudio.at
		final List<Row> rows = Lists.newArrayList();
278 40205 claudio.at
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordWT.xml")));
279 40063 alessia.ba
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordWT.xml")));
280 33382 claudio.at
281 38025 claudio.at
		printAll(mapAll(buildTable(rows)));
282 33382 claudio.at
	}
283
284 43169 alessia.ba
285
286 34438 claudio.at
	@Test
287
	public void testLinkOrganization() throws Exception {
288
289
		final List<Row> rows = Lists.newArrayList();
290 40205 claudio.at
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organization.xml")));
291
		rows.addAll(asRows(loadFromTransformationProfile("projectorganization_2_hbase.xsl"), load("project_organization.xml")));
292
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
293 34438 claudio.at
294 38025 claudio.at
		printAll(mapAll(buildTable(rows)));
295 34438 claudio.at
	}
296
297 35746 alessia.ba
	@Test
298 46587 alessia.ba
	public void testLinkOrganizationAffiliation() throws Exception {
299
300
		final List<Row> rows = Lists.newArrayList();
301
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organization.xml")));
302
		rows.addAll(asRows(loadFromTransformationProfile("resultorganization_2_hbase.xsl"), load("result_organization.xml")));
303
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("record.xml")));
304
305
		printAll(mapAll(buildTable(rows)));
306
	}
307
308
	@Test
309 44483 claudio.at
	public void testDuplicates() throws Exception {
310
		final String mergeId = "50|dedup_wf_001::08ed625d07e5738b794ff14d6773fd9f";
311
		final List<Row> rows = Lists.newArrayList();
312
313 49029 claudio.at
		final Function<Row, Row> f = rowIn -> {
314 44483 claudio.at
315 49029 claudio.at
			final List<Column<String,byte[]>> cols = Lists.newArrayList();
316
			for(Column<String,byte[]> col : rowIn.getColumns()) {
317
				if (col.getName().equals("body")) {
318
					cols.add(new Column(col.getName(), col.getValue()));
319 44483 claudio.at
320
				}
321
			}
322 49029 claudio.at
			return new Row("result", rowIn.getKey(), cols);
323 44483 claudio.at
		};
324
325
		final List<Row> puma1 = asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordPuma1.xml"), f);
326
		puma1.add(new Row("resultResult_dedup_isMergedIn", mergeId));
327
328
		final List<Row> puma2 = asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordPuma2.xml"), f);
329
		puma2.add(new Row("resultResult_dedup_isMergedIn", mergeId));
330
331
		rows.addAll(puma1);
332
		rows.addAll(puma2);
333
334
		List<Oaf> duplicates = Lists.newArrayList();
335
		duplicates.add(getOafBody(puma1));
336
		duplicates.add(getOafBody(puma2));
337
		final Oaf.Builder oafMerge = OafEntityMerger.merge(mergeId, duplicates);
338
339 48697 claudio.at
		final Row mergeRow = new Row("result", mergeId, Lists.newArrayList(new Column("body", oafMerge.build().toByteArray())));
340 44483 claudio.at
341
		rows.add(mergeRow);
342
343
		printAll(mapAll(buildTable(rows)));
344
	}
345
346
	private Oaf getOafBody(final List<Row> rows) throws InvalidProtocolBufferException {
347
		for(Row row : rows) {
348
			if(StringUtils.startsWith(row.getKey(), "50")) {
349
				return Oaf.parseFrom(row.getColumn("body").getValue());
350
351
			}
352
		}
353
		return null;
354
	}
355
356
	@Test
357 35746 alessia.ba
	public void testParseDoajOAF() throws Exception {
358
359 40063 alessia.ba
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("doajUniversityRecord.xml"));
360 35746 alessia.ba
	}
361
362 39888 alessia.ba
	@Test
363
	public void testParseDatasource() throws Exception {
364
365 40205 claudio.at
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("datasourceNative.xml"));
366 39888 alessia.ba
	}
367 44899 alessia.ba
	@Test
368
	public void testParseDatasourcePiwik() throws Exception {
369 39888 alessia.ba
370 44899 alessia.ba
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("datasourcePiwik.xml"));
371
	}
372
373 40205 claudio.at
	@Test
374 40341 alessia.ba
	public void testParseDataDatasource() throws Exception {
375
376
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("dataDatasource.xml"));
377
	}
378
379
	@Test
380 36164 claudio.at
	public void testFromMongodbCompressedDump() throws Exception {
381 40063 alessia.ba
		doTestJsonGz(loadFromTransformationProfile("oaf2hbase.xml"), load("mdstore_cleaned.json.gz"));
382 35746 alessia.ba
	}
383
384 40205 claudio.at
	@Test
385
	public void testLoadFromTransformationProfile() throws IOException {
386
		InputStream in = loadFromTransformationProfile("oaf2hbase.xml");
387
		log.info(IOUtils.toString(in));
388
	}
389
390
	@Test
391
	public void test_template() throws Exception {
392
		final String xslt = IOUtils.toString(loadFromTransformationProfile("oaf2hbase.xml"));
393
		final XsltRowTransformer transformer = factory.getTransformer(xslt);
394
		assertNotNull(transformer);
395
396
		final String record = IOUtils.toString(load("record.xml"));
397
		final List<Row> rows = transformer.apply(record);
398
399
		System.out.println(rows);
400
	}
401
402
	@Test
403
	public void testWrongCharsOrganization() throws Exception {
404
		final List<Row> rows = Lists.newArrayList();
405
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organizationWrongChars.xml")));
406
		printAll(mapAll(buildTable(rows)));
407
	}
408
409 43169 alessia.ba
	@Test
410
	public void testParseProjectWithFunderOriginalName() throws Exception {
411
412
		doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectWithFunderOriginalName.xml"));
413
	}
414
	@Test
415
	public void testLinkFunderOriginalName() throws Exception {
416
417
		final List<Row> rows = Lists.newArrayList();
418
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectWithFunderOriginalName.xml")));
419
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordFunderOriginalName.xml")));
420
421
		printAll(mapAll(buildTable(rows)));
422
	}
423
424 44070 alessia.ba
	@Test
425
	public void testProjectExtraInfo() throws Exception {
426
		final List<Row> rows = Lists.newArrayList();
427
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordExtraInfo.xml")));
428
		printAll(mapAll(buildTable(rows)));
429
	}
430
431 48697 claudio.at
	@Test
432
	public void testParseSoftwareFromODF() throws Exception {
433
		final List<Row> rows = Lists.newArrayList();
434
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("softwareODF.xml")));
435
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
436
		printAll(mapAll(buildTable(rows)));
437
	}
438
439 52212 alessia.ba
	@Test
440 48697 claudio.at
	public void testParseSoftwareFromOAF() throws Exception {
441
		final List<Row> rows = Lists.newArrayList();
442 52212 alessia.ba
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordOAFsoftwareCLOSED.xml")));
443 48697 claudio.at
		printAll(mapAll(buildTable(rows)));
444
	}
445
446 52212 alessia.ba
	@Test
447
	public void testParseSoftwareFromOAFOpen() throws Exception {
448
		final List<Row> rows = Lists.newArrayList();
449
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordOAFsoftwareOPEN.xml")));
450
		printAll(mapAll(buildTable(rows)));
451
	}
452
453 52275 alessia.ba
	@Test
454 53756 alessia.ba
	public void testParseSoftwareBiotool() throws Exception {
455
		final List<Row> rows = Lists.newArrayList();
456
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("biotoolSw.xml")));
457
		printAll(mapAll(buildTable(rows)));
458
	}
459
460
	@Test
461 52275 alessia.ba
	public void testParseOafWithExternalRef() throws Exception {
462
		final List<Row> rows = Lists.newArrayList();
463
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("oafWithExternalReference.xml")));
464
		printAll(mapAll(buildTable(rows)));
465
	}
466
467 52277 alessia.ba
	@Test
468
	public void testParseOafWithCommunity() throws Exception {
469
		final List<Row> rows = Lists.newArrayList();
470
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("oafWithCommunity.xml")));
471
		printAll(mapAll(buildTable(rows)));
472
	}
473
474 53408 claudio.at
	@Test
475
	public void testParseOafWithUpdates() throws Exception {
476
		final List<Row> rows = Lists.newArrayList();
477
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("oafWithCommunity.xml")));
478
479
		ActionFactory actionFactory = new ActionFactory();
480
481
		Map<String, Resource> xslts = Maps.newHashMap();
482
483
		xslts.put("oaf2hbase", new ByteArrayResource(IOUtils.toString(loadFromTransformationProfile("oaf2hbase.xml")).getBytes()));
484
		actionFactory.setXslts(xslts);
485
486
		XsltInfoPackageAction pa = actionFactory.generateInfoPackageAction(
487
				"oaf2hbase",
488
				"rawset-id",
489
				new Agent("agent-id", "agent-name", Agent.AGENT_TYPE.algo),
490
				Operation.UPDATE,
491
				IOUtils.toString(load("oafUpdateWithCommunity.xml")),
492
				Provenance.sysimport_mining_aggregator,
493
				"0.9");
494
495
		final String qualifier = "update_" + System.nanoTime();
496
497
		IOUtils.readLines(load("country_updates.json")).forEach(line -> {
498
499
			Oaf.Builder oaf = Oaf.newBuilder();
500
501
			try {
502
				JsonFormat.merge(line, oaf);
503
			} catch (JsonFormat.ParseException e) {
504
				throw new IllegalArgumentException(e);
505
			}
506
507
			Column<String, byte[]> col = new Column<>("update_" + System.nanoTime(), oaf.build().toByteArray());
508
			rows.add(new Row("result", oaf.getEntity().getId(), Lists.newArrayList(col)));
509
		});
510
511
		pa.asAtomicActions().forEach(a -> {
512
			Column<String, byte[]> col = new Column<>("update_" + System.nanoTime(), a.getTargetValue());
513
			rows.add(new Row(a.getTargetColumnFamily(), a.getTargetRowKey(), Lists.newArrayList(col)));
514
		});
515
516
517
		/*
518
		rows.forEach(r -> {
519
			log.info(r);
520
		});
521
		*/
522
523
		mapAll(buildTable(rows)).entrySet().forEach(b -> {
524
			log.info(b.getKey());
525
			log.info(b.getValue());
526
		});
527
	}
528
529 31997 claudio.at
	private void doTest(final InputStream xsltStream, final InputStream recordStream) throws Exception {
530
		try {
531 34438 claudio.at
			final List<Row> rows = asRows(xsltStream, recordStream);
532 31997 claudio.at
533 40063 alessia.ba
			log.info(rows);
534 26600 sandro.lab
535 34438 claudio.at
			final Map<String, Map<String, Map<String, byte[]>>> table = buildTable(rows);
536 26600 sandro.lab
537 30967 claudio.at
			// System.out.println("\n" + table.toString());
538 26600 sandro.lab
539 34438 claudio.at
			final Map<String, XmlRecordFactory> builders = mapAll(table);
540 28094 claudio.at
541 38025 claudio.at
			printAll(builders);
542 34438 claudio.at
		} catch (final InvalidProtocolBufferException e) {
543 31997 claudio.at
			throw new Exception(e);
544 34438 claudio.at
		} catch (final TransformerConfigurationException e) {
545 31997 claudio.at
			throw new Exception(e);
546 34438 claudio.at
		} catch (final TransformerFactoryConfigurationError e) {
547 31997 claudio.at
			throw new Exception(e);
548 34438 claudio.at
		} catch (final DocumentException e) {
549 31997 claudio.at
			throw new Exception(e);
550 26600 sandro.lab
		}
551
	}
552
553 35746 alessia.ba
	private void doTestJsonGz(final InputStream xsltStream, final InputStream recordStream) throws Exception {
554
555 36164 claudio.at
		final Iterator<List<Row>> rowsIterator = asRowsJsonGzip(xsltStream, recordStream);
556 35746 alessia.ba
557 36164 claudio.at
		int i = 0;
558
		while (rowsIterator.hasNext()) {
559
			final List<Row> rows = rowsIterator.next();
560
			i++;
561 35746 alessia.ba
562 36164 claudio.at
			if ((i % 10000) == 0) {
563
				System.out.println(i);
564
			}
565 35746 alessia.ba
566 36164 claudio.at
			final Map<String, Map<String, Map<String, byte[]>>> table = buildTableDoaj(rows);
567 35746 alessia.ba
568 36164 claudio.at
			for (final Map<String, Map<String, byte[]>> m : table.values()) {
569
				for (final Map<String, byte[]> mv : m.values()) {
570
					for (final byte[] v : mv.values()) {
571
						final OafDecoder d = OafDecoder.decode(v);
572
						assertNotNull(d);
573
						assertNotNull(d.getOaf());
574
575
						switch (d.getKind()) {
576
						case entity:
577
							assertNotNull(d.getMetadata());
578
							if (d.getOaf().getEntity().getType().equals(Type.result)) {
579
								System.out.println(d.getOaf());
580
							}
581
							break;
582
						case relation:
583
							assertNotNull(d.getRel());
584
							break;
585
						default:
586
							break;
587
						}
588
					}
589
				}
590 35746 alessia.ba
			}
591
		}
592
	}
593
594 44483 claudio.at
	private List<Row> asRows(final InputStream xsltStream, final InputStream recordStream, final Function<Row, Row> p) throws Exception {
595 48697 claudio.at
		return asRows(xsltStream, new HashMap<>(), recordStream, p);
596 44483 claudio.at
	}
597
598 31997 claudio.at
	private List<Row> asRows(final InputStream xsltStream, final InputStream recordStream) throws Exception {
599 48697 claudio.at
		return asRows(xsltStream, new HashMap<>(), recordStream);
600 38025 claudio.at
	}
601
602
	private List<Row> asRows(final InputStream xsltStream, final Map<String, Object> params, final InputStream recordStream) throws Exception {
603 44483 claudio.at
		return asRows(xsltStream, params, recordStream, null);
604
	}
605
606
	private List<Row> asRows(final InputStream xsltStream, final Map<String, Object> params, final InputStream recordStream, final Function<Row, Row> p) throws Exception {
607 34438 claudio.at
		final String xslt = IOUtils.toString(xsltStream);
608 38025 claudio.at
		final XsltRowTransformer transformer = factory.getTransformer(xslt, params);
609 31997 claudio.at
		assertNotNull(transformer);
610 26600 sandro.lab
611 34438 claudio.at
		final String record = IOUtils.toString(recordStream);
612
		final List<Row> rows = transformer.apply(record);
613 31997 claudio.at
614
		assertNotNull(rows);
615
		assertFalse(rows.isEmpty());
616 44483 claudio.at
		return p == null ? rows : Lists.newArrayList(Iterables.transform(rows, p));
617 31997 claudio.at
	}
618
619 35746 alessia.ba
	private Iterator<List<Row>> asRowsJsonGzip(final InputStream xsltStream, final InputStream recordStreamJsonGzip) throws Exception {
620
		final String xslt = IOUtils.toString(xsltStream);
621
		final XsltRowTransformer transformer = factory.getTransformer(xslt);
622
		assertNotNull(transformer);
623
		assertNotNull(recordStreamJsonGzip);
624
625 36164 claudio.at
		final GZIPInputStream stream = new GZIPInputStream(recordStreamJsonGzip);
626 35746 alessia.ba
		assertNotNull(stream);
627
		final BufferedReader inStream = new BufferedReader(new InputStreamReader(stream));
628
		assertNotNull(inStream);
629
		return new Iterator<List<Row>>() {
630
631
			String jsonRecord = null;
632
633
			@Override
634
			public boolean hasNext() {
635
				try {
636
					return (jsonRecord = inStream.readLine()) != null;
637 36164 claudio.at
				} catch (final IOException e) {
638 35746 alessia.ba
					throw new RuntimeException(e);
639
				}
640
			}
641
642
			@Override
643
			public List<Row> next() {
644
645 36164 claudio.at
				final JSONObject jsonObj = new JSONObject(jsonRecord);
646
				final String body = jsonObj.getString("body");
647
				try {
648
					assertTrue(StringUtils.isNotBlank(body));
649
					// System.out.println(body);
650
					final List<Row> rows = transformer.apply(body);
651
					assertNotNull(rows);
652
					assertFalse(rows.isEmpty());
653
					return rows;
654
				} catch (final Throwable e) {
655
					System.err.println("error transforming document: " + body);
656
					throw new RuntimeException(e);
657
				}
658 35746 alessia.ba
			}
659
660
			@Override
661
			public void remove() {
662 43394 claudio.at
				throw new UnsupportedOperationException();
663 35746 alessia.ba
			}
664
665
		};
666
667
	}
668
669
	private Map<String, Map<String, Map<String, byte[]>>> buildTableDoaj(final List<Row> rows) throws UnsupportedEncodingException {
670 34438 claudio.at
		final Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
671 31997 claudio.at
672 34438 claudio.at
		for (final Row row : rows) {
673
			final String rowKey = row.getKey();
674
			final String cf = row.getColumnFamily();
675 31997 claudio.at
			if (!table.containsKey(rowKey)) {
676 48697 claudio.at
				table.put(rowKey, new HashMap<>());
677 31997 claudio.at
			}
678
			if (!table.get(rowKey).containsKey(cf)) {
679 48697 claudio.at
				table.get(rowKey).put(row.getColumnFamily(), new HashMap<>());
680 31997 claudio.at
			}
681 34438 claudio.at
			for (final Column<String, byte[]> c : row.getColumns()) {
682 35746 alessia.ba
				// System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
683
				table.get(rowKey).get(cf).put(c.getName(), c.getValue());
684
				if (cf.equals("result") && c.getName().equals("body")) {
685 36164 claudio.at
					// System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
686 35746 alessia.ba
					assertTrue(StringUtils.isNotBlank(new String(c.getValue(), "UTF-8")));
687
				}
688
			}
689
		}
690
		return table;
691
692
	}
693
694 53588 sandro.lab
	protected Map<String, Map<String, Map<String, byte[]>>> buildTable(final List<Row> rows) throws UnsupportedEncodingException {
695 35746 alessia.ba
		final Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
696
697
		for (final Row row : rows) {
698
			final String rowKey = row.getKey();
699
			final String cf = row.getColumnFamily();
700
			if (!table.containsKey(rowKey)) {
701 49029 claudio.at
				table.put(rowKey, new HashMap<>());
702 35746 alessia.ba
			}
703
			if (!table.get(rowKey).containsKey(cf)) {
704 49029 claudio.at
				table.get(rowKey).put(row.getColumnFamily(), new HashMap<>());
705 35746 alessia.ba
			}
706
			for (final Column<String, byte[]> c : row.getColumns()) {
707 31997 claudio.at
				System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
708
				table.get(rowKey).get(cf).put(c.getName(), c.getValue());
709 38586 claudio.at
				if (c.getName().equals("body")) {
710 36164 claudio.at
					final String theBody = new String(c.getValue(), "UTF-8");
711 35746 alessia.ba
					assertTrue(StringUtils.isNotBlank(theBody));
712 44070 alessia.ba
					//System.out.println(theBody);
713 35746 alessia.ba
				}
714 31997 claudio.at
			}
715
		}
716
		return table;
717 35746 alessia.ba
718 31997 claudio.at
	}
719
720 53588 sandro.lab
	protected Map<String, XmlRecordFactory> mapAll(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
721 31997 claudio.at
722 34438 claudio.at
		final Map<String, XmlRecordFactory> builders = Maps.newHashMap();
723
		for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
724 31997 claudio.at
			map(builders, e.getKey(), e.getValue());
725
		}
726
		return builders;
727
	}
728
729 35746 alessia.ba
	// private Map<String, XmlRecordFactory> mapResultsOnly(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
730
	//
731
	// final Map<String, XmlRecordFactory> builders = Maps.newHashMap();
732
	// for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
733
	// final Type type = OafRowKeyDecoder.decode(e.getKey()).getType();
734
	// if (type == Type.result) {
735
	// map(builders, e.getKey(), e.getValue());
736
	// }
737
	// }
738
	// return builders;
739
	// }
740
741 31997 claudio.at
	private void map(final Map<String, XmlRecordFactory> builders, final String rowKey, final Map<String, Map<String, byte[]>> row) throws Exception {
742
743 30967 claudio.at
		final Type type = OafRowKeyDecoder.decode(rowKey).getType();
744 26600 sandro.lab
745 41468 claudio.at
		final Map<String, byte[]> familyMap = row.get(type.toString());
746 26600 sandro.lab
747 41468 claudio.at
		if (familyMap == null) return;
748 26600 sandro.lab
749 41468 claudio.at
		final byte[] bodyB = familyMap.get("body");
750 26600 sandro.lab
751 30967 claudio.at
		if (bodyB != null) {
752
			ensureBuilder(builders, rowKey);
753 41468 claudio.at
754
			final Oaf oaf = UpdateMerger.mergeBodyUpdates(familyMap);
755
756
			final OafDecoder mainEntity = OafDecoder.decode(oaf);
757
758 30967 claudio.at
			builders.get(rowKey).setMainEntity(mainEntity);
759 28094 claudio.at
760 34438 claudio.at
			for (final LinkDescriptor ld : entityConfigTable.getDescriptors(type)) {
761 26600 sandro.lab
762 34438 claudio.at
				final String it = ld.getRelDescriptor().getIt();
763
				final Map<String, byte[]> cols = row.get(it);
764 26600 sandro.lab
765 35746 alessia.ba
				if ((cols != null) && !cols.isEmpty()) {
766 26600 sandro.lab
767 34438 claudio.at
					for (final byte[] oafB : cols.values()) {
768 26600 sandro.lab
769 41468 claudio.at
						final Oaf.Builder relBuilder = Oaf.newBuilder(Oaf.parseFrom(oafB));
770 26600 sandro.lab
771 30967 claudio.at
						if (ld.isSymmetric()) {
772 34438 claudio.at
							final RelDescriptor rd = ld.getRelDescriptor();
773 40205 claudio.at
774
							relBuilder.getRelBuilder().setCachedTarget(mainEntity.getEntity()).setRelType(rd.getRelType()).setSubRelType(rd.getSubRelType());
775 30967 claudio.at
						}
776 26600 sandro.lab
777 40205 claudio.at
						relBuilder.getRelBuilder().setChild(ld.isChild());
778 26600 sandro.lab
779 40314 claudio.at
						final Oaf.Builder oafBuilder = Oaf.newBuilder().setKind(Kind.relation).setLastupdatetimestamp(System.currentTimeMillis());
780 40205 claudio.at
						oafBuilder.mergeFrom(relBuilder.build());
781 26600 sandro.lab
782 40205 claudio.at
						final String targetId = ld.isSymmetric() ? oafBuilder.getRel().getTarget() : oafBuilder.getRel().getSource();
783 30967 claudio.at
						ensureBuilder(builders, targetId);
784 34438 claudio.at
						final OafDecoder decoder = OafDecoder.decode(oafBuilder.build());
785 37334 claudio.at
786 30967 claudio.at
						if (ld.isChild()) {
787 37334 claudio.at
							builders.get(targetId).addChild(type, decoder);
788 30967 claudio.at
						} else {
789 37334 claudio.at
							builders.get(targetId).addRelation(type, decoder);
790 30967 claudio.at
						}
791 37334 claudio.at
					}
792 26600 sandro.lab
793 30967 claudio.at
				}
794
			}
795 26600 sandro.lab
		}
796
797
	}
798
799 38025 claudio.at
	private void printAll(final Map<String, XmlRecordFactory> builders) throws DocumentException {
800
		print(Sets.newHashSet(Type.values()), builders, null);
801
	}
802
803
	private void print(final Set<Type> types, final Map<String, XmlRecordFactory> builders, final Map<Type, Set<String>> xpaths) throws DocumentException {
804
		final SAXReader r = new SAXReader();
805
806 34438 claudio.at
		for (final Entry<String, XmlRecordFactory> e : builders.entrySet()) {
807 38025 claudio.at
			final OafRowKeyDecoder kd = OafRowKeyDecoder.decode(e.getKey());
808
809
			if (!e.getValue().isValid()) throw new IllegalArgumentException("invalid builder: " + e.getKey());
810
			if (types.contains(kd.getType())) {
811
				final String val = IndentXmlString.apply(e.getValue().build());
812
813
				if ((xpaths != null) && !xpaths.isEmpty() && (xpaths.get(kd.getType()) != null)) {
814
					final Document doc = r.read(new StringReader(val));
815
816 40063 alessia.ba
					log.debug("\n" + e.getKey());
817 38025 claudio.at
					for (final String xpath : xpaths.get(kd.getType())) {
818 40063 alessia.ba
						log.debug(doc.valueOf(xpath));
819 38025 claudio.at
					}
820
				} else {
821 40063 alessia.ba
					log.info(val);
822 38025 claudio.at
				}
823 31997 claudio.at
			}
824
		}
825
	}
826
827 35746 alessia.ba
	private void printNoIndent(final Map<String, XmlRecordFactory> builders) {
828
		for (final Entry<String, XmlRecordFactory> e : builders.entrySet()) {
829
			if (e.getValue().isValid()) {
830 40063 alessia.ba
				log.debug(e.getValue().build());
831 35746 alessia.ba
			} else {
832 40063 alessia.ba
				log.debug("invalid builder: " + e.getKey());
833 35746 alessia.ba
			}
834
		}
835
	}
836
837 31997 claudio.at
	private void ensureBuilder(final Map<String, XmlRecordFactory> builders, final String rowKey) throws Exception {
838 30967 claudio.at
		if (!builders.containsKey(rowKey)) {
839
			builders.put(rowKey, newBuilder());
840 26600 sandro.lab
		}
841
	}
842
843 30967 claudio.at
	private XmlRecordFactory newBuilder() throws TransformerConfigurationException, TransformerFactoryConfigurationError, DocumentException {
844 46587 alessia.ba
		return new XmlRecordFactory(entityConfigTable, ContextMapper.fromXml(Context.xml),
845
				RelClasses.fromJSon(RelClassesTest.relClassesJson), XmlRecordFactoryTest.SCHEMA_LOCATION, true, false, false, XmlRecordFactoryTest.specialDatasourceTypes);
846 30967 claudio.at
	}
847
848 33382 claudio.at
	private InputStream load(final String fileName) {
849
		return getClass().getResourceAsStream(fileName);
850
	}
851
852 40063 alessia.ba
	private InputStream loadFromTransformationProfile(final String profilePath) {
853
		log.info("Loading xslt from: " + basePathProfiles + profilePath);
854
		InputStream profile = getClass().getResourceAsStream(basePathProfiles + profilePath);
855
		SAXReader saxReader = new SAXReader();
856
		Document doc = null;
857
		try {
858
			doc = saxReader.read(profile);
859
		} catch (DocumentException e) {
860
			e.printStackTrace();
861
			throw new RuntimeException(e);
862
		}
863
		String xslt = doc.selectSingleNode("//SCRIPT/CODE/*[local-name()='stylesheet']").asXML();
864 52422 claudio.at
		//log.info(xslt);
865 40063 alessia.ba
		return IOUtils.toInputStream(xslt);
866
	}
867
868 26600 sandro.lab
}