Project

General

Profile

1
package eu.dnetlib.data.transform;
2

    
3
import java.io.*;
4
import java.util.*;
5
import java.util.Map.Entry;
6
import java.util.zip.GZIPInputStream;
7
import javax.xml.transform.TransformerConfigurationException;
8
import javax.xml.transform.TransformerFactoryConfigurationError;
9

    
10
import com.google.common.base.Function;
11
import com.google.common.collect.Iterables;
12
import com.google.common.collect.Lists;
13
import com.google.common.collect.Maps;
14
import com.google.common.collect.Sets;
15
import com.google.protobuf.InvalidProtocolBufferException;
16
import com.googlecode.protobuf.format.JsonFormat;
17
import com.googlecode.protobuf.format.JsonFormat.ParseException;
18
import eu.dnetlib.actionmanager.actions.ActionFactory;
19
import eu.dnetlib.actionmanager.actions.XsltInfoPackageAction;
20
import eu.dnetlib.actionmanager.common.Agent;
21
import eu.dnetlib.actionmanager.common.Operation;
22
import eu.dnetlib.actionmanager.common.Provenance;
23
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
24
import eu.dnetlib.data.mapreduce.util.*;
25
import eu.dnetlib.data.proto.KindProtos.Kind;
26
import eu.dnetlib.data.proto.OafProtos.Oaf;
27
import eu.dnetlib.data.proto.TypeProtos.Type;
28
import eu.dnetlib.miscutils.functional.xml.IndentXmlString;
29
import org.apache.commons.io.IOUtils;
30
import org.apache.commons.lang.StringUtils;
31
import org.apache.commons.logging.Log;
32
import org.apache.commons.logging.LogFactory;
33
import org.dom4j.Document;
34
import org.dom4j.DocumentException;
35
import org.dom4j.io.SAXReader;
36
import org.json.JSONObject;
37
import org.junit.Before;
38
import org.junit.Ignore;
39
import org.junit.Test;
40
import org.springframework.core.io.ByteArrayResource;
41
import org.springframework.core.io.Resource;
42

    
43
import static org.junit.Assert.*;
44

    
45
public class XsltRowTransformerFactoryTest {
46

    
47
	private static final Log log = LogFactory.getLog(XsltRowTransformerFactoryTest.class);
48
	private static String basePathProfiles = "/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/2hbase/";
49
	private XsltRowTransformerFactory factory;
50
	private EntityConfigTable entityConfigTable;
51

    
52
	@Before
53
	public void setUp() throws Exception {
54
		factory = new XsltRowTransformerFactory();
55
		entityConfigTable = IndexConfig.load(IndexConfigTest.config).getConfigMap();
56
	}
57

    
58
	@Test
59
	@Ignore // need to reimplement because claimUpdates_2_hbase.xsl was removed
60
	public void testParseOafClaimUpdate() throws Exception {
61
		doTest(loadFromTransformationProfile("claimUpdates_2_hbase.xsl"), load("recordClaimUpdate.xml"));
62
	}
63

    
64
	@Test
65
	@Ignore // need to reimplement because claimUpdates_2_hbase.xsl was removed
66
	public void testParseClaimUpdate() throws Exception {
67

    
68
		final List<Row> rows = Lists.newArrayList();
69
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordClaim.xml")));
70
		rows.addAll(asRows(loadFromTransformationProfile("claimUpdates_2_hbase.xsl"), load("recordClaimUpdate.xml")));
71

    
72
		printAll(mapAll(buildTable(rows)));
73
	}
74

    
75
	@Test
76
	public void testParseClaimRel() throws Exception {
77

    
78
		doTest(loadFromTransformationProfile("claimRels_2_hbase.xml"), load("recordClaimRel.xml"));
79
	}
80

    
81

    
82
	@Test
83
	public void testParseFp7IctPUB() throws Exception {
84

    
85
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("ec_fp7_ict.xml"));
86
	}
87

    
88
	@Test
89
	public void testParseRecordCrossref() throws Exception {
90

    
91
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordCrossref.xml"));
92
	}
93

    
94
	@Test
95
	public void testParseDatasetPUB() throws Exception {
96

    
97
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatasetPUB.xml"));
98
	}
99

    
100
	@Test
101
	public void testParseSoftwareEgiApp() throws Exception {
102

    
103
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("softwareEgiApp.xml"));
104
	}
105

    
106
	@Test
107
	public void testParseSoftwareEgiApp2() throws Exception {
108

    
109
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("softwareEgiApp2.xml"));
110
	}
111

    
112
	@Test
113
	public void testParseOrpEgiApp() throws Exception {
114

    
115
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("orpEgiApp.xml"));
116
	}
117

    
118
	@Test
119
	public void testParseSoftwareDOECODE() throws Exception {
120

    
121
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("doecode.xml"));
122
	}
123

    
124

    
125
	@Test
126
	public void testParseDatasetLindat() throws Exception {
127

    
128
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("datasetLindat.xml"));
129
	}
130

    
131
	@Test
132
	public void testParseDatasetNeuroVault() throws Exception {
133

    
134
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordNeuroVault.xml"));
135
	}
136

    
137
	@Test
138
	public void testParseDatasetNeuroVault2() throws Exception {
139

    
140
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordNeuroVault2.xml"));
141
	}
142

    
143
	@Test
144
	public void testParseClaim() throws Exception {
145

    
146
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordClaim.xml"));
147
	}
148

    
149
	@Test
150
	public void testParseClaimDedup() throws Exception {
151

    
152
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordClaimedDedup.xml"));
153
	}
154

    
155

    
156
	@Test
157
	public void testParseClaimDataset() throws Exception {
158

    
159
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordClaimDataset.xml"));
160
	}
161

    
162

    
163
	@Test
164
	public void testParseACM() throws Exception {
165

    
166
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordACM.xml"));
167
	}
168

    
169
	@Test
170
	public void testParseASB() throws Exception {
171

    
172
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordASB.xml"));
173
	}
174

    
175
	@Test
176
	public void testParseProjectCorda() throws Exception {
177

    
178
		doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml"));
179
	}
180

    
181
	@Test
182
	public void testParseProjectFCT() throws Exception {
183

    
184
		doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordFCT.xml"));
185
	}
186

    
187

    
188
	@Test
189
	public void testParseOaf() throws Exception {
190

    
191
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("record.xml"));
192
	}
193

    
194
	@Test
195
	public void testParseOafPublication() throws Exception {
196

    
197
		doTest(loadFromTransformationProfile("oaf_entity2hbase.xml"), load("record.xml"));
198
	}
199

    
200
	@Test
201
	public void testParseLindat() throws Exception {
202

    
203
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordLindat.xml"));
204
	}
205

    
206
	@Test
207
	public void testParseDatacite() throws Exception {
208

    
209
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatacite.xml"));
210
	}
211

    
212
	@Test
213
	public void testParseDatacite2() throws Exception {
214

    
215
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatacite2.xml"));
216
	}
217

    
218
	@Test
219
	public void testParseDataciteNewES() throws Exception {
220

    
221
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("dataciteNew.xml"));
222
	}
223

    
224
	@Test
225
	public void testParseOpenTrials() throws Exception {
226

    
227
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("opentrials_datacite1.xml"));
228
	}
229

    
230
	@Test
231
	public void testLinkPangaea() throws Exception {
232

    
233
		final List<Row> rows = Lists.newArrayList();
234
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("pangaeODF1.xml")));
235
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("pangaeODF2.xml")));
236
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("pangaeOAF.xml")));
237
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCordaPangaea.xml")));
238

    
239
		printAll(mapAll(buildTable(rows)));
240
	}
241

    
242
	@Test
243
	public void testPangaea() throws Exception {
244

    
245
		final List<Row> rows = Lists.newArrayList();
246
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("pangaeOAF2.xml")));
247
		printAll(mapAll(buildTable(rows)));
248
	}
249
	@Test
250
	public void testZenodo() throws Exception {
251

    
252
		final List<Row> rows = Lists.newArrayList();
253
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("zenodoData.xml")));
254
		printAll(mapAll(buildTable(rows)));
255
	}
256

    
257
	@Test
258
	public void testZenodoSoftware() throws Exception {
259

    
260
		final List<Row> rows = Lists.newArrayList();
261
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("softwareZenodo_odf.xml")));
262
		printAll(mapAll(buildTable(rows)));
263
	}
264

    
265
	@Test
266
	public void testLinkCorda() throws Exception {
267

    
268
		final List<Row> rows = Lists.newArrayList();
269
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
270
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordCorda.xml")));
271

    
272
		printAll(mapAll(buildTable(rows)));
273
	}
274

    
275
	@Test
276
	public void testLinkFCT() throws Exception {
277

    
278
		final List<Row> rows = Lists.newArrayList();
279
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordFCT.xml")));
280
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordFCT.xml")));
281

    
282
		printAll(mapAll(buildTable(rows)));
283
	}
284

    
285
	@Test
286
	public void testLinkARC() throws Exception {
287

    
288
		final List<Row> rows = Lists.newArrayList();
289
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordARC.xml")));
290
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordARC.xml")));
291

    
292
		printAll(mapAll(buildTable(rows)));
293
	}
294

    
295
	@Test
296
	public void testLinkWT() throws Exception {
297

    
298
		final List<Row> rows = Lists.newArrayList();
299
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordWT.xml")));
300
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordWT.xml")));
301

    
302
		printAll(mapAll(buildTable(rows)));
303
	}
304

    
305

    
306

    
307
	@Test
308
	public void testLinkOrganization() throws Exception {
309

    
310
		final List<Row> rows = Lists.newArrayList();
311
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organization.xml")));
312
		rows.addAll(asRows(loadFromTransformationProfile("projectorganization_2_hbase.xsl"), load("project_organization.xml")));
313
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
314

    
315
		printAll(mapAll(buildTable(rows)));
316
	}
317

    
318
	@Test
319
	public void testLinkOrganizationAffiliation() throws Exception {
320

    
321
		final List<Row> rows = Lists.newArrayList();
322
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organization.xml")));
323
		rows.addAll(asRows(loadFromTransformationProfile("resultorganization_2_hbase.xsl"), load("result_organization.xml")));
324
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("record.xml")));
325

    
326
		printAll(mapAll(buildTable(rows)));
327
	}
328

    
329
	@Test
330
	public void testDuplicates() throws Exception {
331
		final String mergeId = "50|dedup_wf_001::08ed625d07e5738b794ff14d6773fd9f";
332
		final List<Row> rows = Lists.newArrayList();
333

    
334
		final Function<Row, Row> f = rowIn -> {
335

    
336
			final List<Column<String,byte[]>> cols = Lists.newArrayList();
337
			for(Column<String,byte[]> col : rowIn.getColumns()) {
338
				if (col.getName().equals("body")) {
339
					cols.add(new Column(col.getName(), col.getValue()));
340

    
341
				}
342
			}
343
			return new Row("result", rowIn.getKey(), cols);
344
		};
345

    
346
		final List<Row> puma1 = asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordPuma1.xml"), f);
347
		puma1.add(new Row("resultResult_dedup_isMergedIn", mergeId));
348

    
349
		final List<Row> puma2 = asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordPuma2.xml"), f);
350
		puma2.add(new Row("resultResult_dedup_isMergedIn", mergeId));
351

    
352
		rows.addAll(puma1);
353
		rows.addAll(puma2);
354

    
355
		List<Oaf> duplicates = Lists.newArrayList();
356
		duplicates.add(getOafBody(puma1));
357
		duplicates.add(getOafBody(puma2));
358
		final Oaf.Builder oafMerge = OafEntityMerger.merge(mergeId, duplicates);
359

    
360
		final Row mergeRow = new Row("result", mergeId, Lists.newArrayList(new Column("body", oafMerge.build().toByteArray())));
361

    
362
		rows.add(mergeRow);
363

    
364
		printAll(mapAll(buildTable(rows)));
365
	}
366

    
367
	private Oaf getOafBody(final List<Row> rows) throws InvalidProtocolBufferException {
368
		for(Row row : rows) {
369
			if(StringUtils.startsWith(row.getKey(), "50")) {
370
				return Oaf.parseFrom(row.getColumn("body").getValue());
371

    
372
			}
373
		}
374
		return null;
375
	}
376

    
377
	@Test
378
	public void testParseDoajOAF() throws Exception {
379

    
380
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("doajUniversityRecord.xml"));
381
	}
382

    
383
	@Test
384
	public void testParseDatasource() throws Exception {
385

    
386
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("datasourceNative.xml"));
387
	}
388
	@Test
389
	public void testParseDatasourcePiwik() throws Exception {
390

    
391
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("datasourcePiwik.xml"));
392
	}
393

    
394
	@Test
395
	public void testParseDataDatasource() throws Exception {
396

    
397
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("dataDatasource.xml"));
398
	}
399

    
400
	@Test
401
	public void testFromMongodbCompressedDump() throws Exception {
402
		doTestJsonGz(loadFromTransformationProfile("oaf2hbase.xml"), load("mdstore_cleaned.json.gz"));
403
	}
404

    
405

    
406
	@Test
407
	public void testLoadFromTransformationProfile() throws IOException {
408
		InputStream in = loadFromTransformationProfile("oaf2hbase.xml");
409
		log.info(IOUtils.toString(in));
410
	}
411

    
412
	@Test
413
	public void test_template() throws Exception {
414
		final String xslt = IOUtils.toString(loadFromTransformationProfile("oaf2hbase.xml"));
415
		final XsltRowTransformer transformer = factory.getTransformer(xslt);
416
		assertNotNull(transformer);
417

    
418
		final String record = IOUtils.toString(load("record.xml"));
419
		final List<Row> rows = transformer.apply(record);
420

    
421
		System.out.println(rows);
422
	}
423

    
424
	@Test
425
	public void testWrongCharsOrganization() throws Exception {
426
		final List<Row> rows = Lists.newArrayList();
427
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organizationWrongChars.xml")));
428
		printAll(mapAll(buildTable(rows)));
429
	}
430

    
431
	@Test
432
	public void testParseProjectWithFunderOriginalName() throws Exception {
433

    
434
		doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectWithFunderOriginalName.xml"));
435
	}
436
	@Test
437
	public void testLinkFunderOriginalName() throws Exception {
438

    
439
		final List<Row> rows = Lists.newArrayList();
440
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectWithFunderOriginalName.xml")));
441
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordFunderOriginalName.xml")));
442

    
443
		printAll(mapAll(buildTable(rows)));
444
	}
445

    
446
	@Test
447
	public void testProjectExtraInfo() throws Exception {
448
		final List<Row> rows = Lists.newArrayList();
449
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordExtraInfo.xml")));
450
		printAll(mapAll(buildTable(rows)));
451
	}
452

    
453
	@Test
454
	public void testParseSoftwareFromODF() throws Exception {
455
		final List<Row> rows = Lists.newArrayList();
456
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("softwareODF.xml")));
457
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
458
		printAll(mapAll(buildTable(rows)));
459
	}
460

    
461
	@Test
462
	public void testParseSoftwareFromOAF() throws Exception {
463
		final List<Row> rows = Lists.newArrayList();
464
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordOAFsoftwareCLOSED.xml")));
465
		printAll(mapAll(buildTable(rows)));
466
	}
467

    
468
	@Test
469
	public void testParsePubFromODF() throws Exception {
470
		final List<Row> rows = Lists.newArrayList();
471
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("recordODFPub.xml")));
472
		printAll(mapAll(buildTable(rows)));
473
	}
474

    
475
	@Test
476
	public void testParseSoftwareFromOAFOpen() throws Exception {
477
		final List<Row> rows = Lists.newArrayList();
478
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordOAFsoftwareOPEN.xml")));
479
		printAll(mapAll(buildTable(rows)));
480
	}
481

    
482
	@Test
483
	public void testParseSoftwareBiotool() throws Exception {
484
		final List<Row> rows = Lists.newArrayList();
485
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("biotoolSw.xml")));
486
		printAll(mapAll(buildTable(rows)));
487
	}
488

    
489
	@Test
490
	public void testParseOafWithExternalRef() throws Exception {
491
		final List<Row> rows = Lists.newArrayList();
492
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("oafWithExternalReference.xml")));
493
		printAll(mapAll(buildTable(rows)));
494
	}
495

    
496
	@Test
497
	public void testParseOafWithCommunity() throws Exception {
498
		final List<Row> rows = Lists.newArrayList();
499
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("oafWithCommunity.xml")));
500
		printAll(mapAll(buildTable(rows)));
501
	}
502

    
503
	@Test
504
	public void testParseOafWithUpdates() throws Exception {
505
		final List<Row> rows = Lists.newArrayList();
506
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("oafWithCommunity.xml")));
507

    
508
		ActionFactory actionFactory = new ActionFactory();
509

    
510
		Map<String, Resource> xslts = Maps.newHashMap();
511

    
512
		xslts.put("oaf2hbase", new ByteArrayResource(IOUtils.toString(loadFromTransformationProfile("oaf2hbase.xml")).getBytes()));
513
		actionFactory.setXslts(xslts);
514

    
515
		XsltInfoPackageAction pa = actionFactory.generateInfoPackageAction(
516
				"oaf2hbase",
517
				"rawset-id",
518
				new Agent("agent-id", "agent-name", Agent.AGENT_TYPE.algo),
519
				Operation.UPDATE,
520
				IOUtils.toString(load("oafUpdateWithCommunity.xml")),
521
				Provenance.sysimport_mining_aggregator,
522
				"0.9");
523

    
524
		final String qualifier = "update_" + System.nanoTime();
525

    
526
		IOUtils.readLines(load("country_updates.json")).forEach(line -> {
527

    
528
			Oaf.Builder oaf = Oaf.newBuilder();
529

    
530
			try {
531
				JsonFormat.merge(line, oaf);
532
			} catch (JsonFormat.ParseException e) {
533
				throw new IllegalArgumentException(e);
534
			}
535

    
536
			Column<String, byte[]> col = new Column<>("update_" + System.nanoTime(), oaf.build().toByteArray());
537
			rows.add(new Row("result", oaf.getEntity().getId(), Lists.newArrayList(col)));
538
		});
539

    
540
		pa.asAtomicActions().forEach(a -> {
541
			Column<String, byte[]> col = new Column<>("update_" + System.nanoTime(), a.getTargetValue());
542
			rows.add(new Row(a.getTargetColumnFamily(), a.getTargetRowKey(), Lists.newArrayList(col)));
543
		});
544

    
545

    
546
		/*
547
		rows.forEach(r -> {
548
			log.info(r);
549
		});
550
		*/
551

    
552
		mapAll(buildTable(rows)).entrySet().forEach(b -> {
553
			log.info(b.getKey());
554
			log.info(b.getValue());
555
		});
556
	}
557

    
558
	@Test
559
	public void testParseCrisPub() throws Exception {
560
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("cris_pub1.xml"));
561
	}
562

    
563
	@Test
564
	public void testBioToolSwRowJson() throws Exception {
565
		doTestJsonRow(IOUtils.toString(load("biotoolSwRow.json")));
566
	}
567

    
568
	@Test
569
	public void testParseVirta() throws Exception {
570
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("virta.xml"));
571
	}
572

    
573
	@Test
574
	public void testParseJournal() throws Exception {
575
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("datasourceWithISSN.xml"));
576
	}
577

    
578
	@Test
579
	public void testGuidelines4Qeios() throws Exception {
580

    
581
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("guidelines4_qeios1.xml"));
582
	}
583

    
584
    @Test
585
    public void testGuidelines4Aria() throws Exception {
586

    
587
        doTest(loadFromTransformationProfile("odf2hbase.xml"), load("guidelines4_aria.xml"));
588
    }
589

    
590
	private void doTestJsonRow(final String json) throws Exception {
591
		Row row = asRowFromJson(json);
592
		log.info(row);
593
		List<Row> rows = new ArrayList<>();
594
		rows.add(row);
595
		final Map<String, Map<String, Map<String, byte[]>>> table = buildTable(rows);
596
		final Map<String, XmlRecordFactory> builders = mapAll(table);
597
		printAll(builders);
598

    
599
	}
600

    
601
	private void doTest(final InputStream xsltStream, final InputStream recordStream) throws Exception {
602
		try {
603
			final List<Row> rows = asRows(xsltStream, recordStream);
604

    
605
			log.info(rows);
606

    
607
			final Map<String, Map<String, Map<String, byte[]>>> table = buildTable(rows);
608

    
609
			// System.out.println("\n" + table.toString());
610

    
611
			final Map<String, XmlRecordFactory> builders = mapAll(table);
612

    
613
			printAll(builders);
614
		} catch (final InvalidProtocolBufferException e) {
615
			throw new Exception(e);
616
		} catch (final TransformerConfigurationException e) {
617
			throw new Exception(e);
618
		} catch (final TransformerFactoryConfigurationError e) {
619
			throw new Exception(e);
620
		} catch (final DocumentException e) {
621
			throw new Exception(e);
622
		}
623
	}
624

    
625
	private void doTestJsonGz(final InputStream xsltStream, final InputStream recordStream) throws Exception {
626

    
627
		final Iterator<List<Row>> rowsIterator = asRowsJsonGzip(xsltStream, recordStream);
628

    
629
		int i = 0;
630
		while (rowsIterator.hasNext()) {
631
			final List<Row> rows = rowsIterator.next();
632
			i++;
633

    
634
			if ((i % 10000) == 0) {
635
				System.out.println(i);
636
			}
637

    
638
			final Map<String, Map<String, Map<String, byte[]>>> table = buildTableDoaj(rows);
639

    
640
			for (final Map<String, Map<String, byte[]>> m : table.values()) {
641
				for (final Map<String, byte[]> mv : m.values()) {
642
					for (final byte[] v : mv.values()) {
643
						final OafDecoder d = OafDecoder.decode(v);
644
						assertNotNull(d);
645
						assertNotNull(d.getOaf());
646

    
647
						switch (d.getKind()) {
648
						case entity:
649
							assertNotNull(d.getMetadata());
650
							if (d.getOaf().getEntity().getType().equals(Type.result)) {
651
								System.out.println(d.getOaf());
652
							}
653
							break;
654
						case relation:
655
							assertNotNull(d.getRel());
656
							break;
657
						default:
658
							break;
659
						}
660
					}
661
				}
662
			}
663
		}
664
	}
665

    
666
	private List<Row> asRows(final InputStream xsltStream, final InputStream recordStream, final Function<Row, Row> p) throws Exception {
667
		return asRows(xsltStream, new HashMap<>(), recordStream, p);
668
	}
669

    
670
	private List<Row> asRows(final InputStream xsltStream, final InputStream recordStream) throws Exception {
671
		return asRows(xsltStream, new HashMap<>(), recordStream);
672
	}
673

    
674
	private List<Row> asRows(final InputStream xsltStream, final Map<String, Object> params, final InputStream recordStream) throws Exception {
675
		return asRows(xsltStream, params, recordStream, null);
676
	}
677

    
678
	private List<Row> asRows(final InputStream xsltStream, final Map<String, Object> params, final InputStream recordStream, final Function<Row, Row> p) throws Exception {
679
		final String xslt = IOUtils.toString(xsltStream);
680
		final XsltRowTransformer transformer = factory.getTransformer(xslt, params);
681
		assertNotNull(transformer);
682

    
683
		final String record = IOUtils.toString(recordStream);
684
		final List<Row> rows = transformer.apply(record);
685

    
686
		assertNotNull(rows);
687
		assertFalse(rows.isEmpty());
688
		return p == null ? rows : Lists.newArrayList(Iterables.transform(rows, p));
689
	}
690

    
691
	private Iterator<List<Row>> asRowsJsonGzip(final InputStream xsltStream, final InputStream recordStreamJsonGzip) throws Exception {
692
		final String xslt = IOUtils.toString(xsltStream);
693
		final XsltRowTransformer transformer = factory.getTransformer(xslt);
694
		assertNotNull(transformer);
695
		assertNotNull(recordStreamJsonGzip);
696

    
697
		final GZIPInputStream stream = new GZIPInputStream(recordStreamJsonGzip);
698
		assertNotNull(stream);
699
		final BufferedReader inStream = new BufferedReader(new InputStreamReader(stream));
700
		assertNotNull(inStream);
701
		return new Iterator<List<Row>>() {
702

    
703
			String jsonRecord = null;
704

    
705
			@Override
706
			public boolean hasNext() {
707
				try {
708
					return (jsonRecord = inStream.readLine()) != null;
709
				} catch (final IOException e) {
710
					throw new RuntimeException(e);
711
				}
712
			}
713

    
714
			@Override
715
			public List<Row> next() {
716

    
717
				final JSONObject jsonObj = new JSONObject(jsonRecord);
718
				final String body = jsonObj.getString("body");
719
				try {
720
					assertTrue(StringUtils.isNotBlank(body));
721
					// System.out.println(body);
722
					final List<Row> rows = transformer.apply(body);
723
					assertNotNull(rows);
724
					assertFalse(rows.isEmpty());
725
					return rows;
726
				} catch (final Throwable e) {
727
					System.err.println("error transforming document: " + body);
728
					throw new RuntimeException(e);
729
				}
730
			}
731

    
732
			@Override
733
			public void remove() {
734
				throw new UnsupportedOperationException();
735
			}
736

    
737
		};
738

    
739
	}
740

    
741
	private Map<String, Map<String, Map<String, byte[]>>> buildTableDoaj(final List<Row> rows) throws UnsupportedEncodingException {
742
		final Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
743

    
744
		for (final Row row : rows) {
745
			final String rowKey = row.getKey();
746
			final String cf = row.getColumnFamily();
747
			if (!table.containsKey(rowKey)) {
748
				table.put(rowKey, new HashMap<>());
749
			}
750
			if (!table.get(rowKey).containsKey(cf)) {
751
				table.get(rowKey).put(row.getColumnFamily(), new HashMap<>());
752
			}
753
			for (final Column<String, byte[]> c : row.getColumns()) {
754
				// System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
755
				table.get(rowKey).get(cf).put(c.getName(), c.getValue());
756
				if (cf.equals("result") && c.getName().equals("body")) {
757
					// System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
758
					assertTrue(StringUtils.isNotBlank(new String(c.getValue(), "UTF-8")));
759
				}
760
			}
761
		}
762
		return table;
763

    
764
	}
765

    
766
	protected Map<String, Map<String, Map<String, byte[]>>> buildTable(final List<Row> rows) throws UnsupportedEncodingException {
767
		final Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
768

    
769
		for (final Row row : rows) {
770
			final String rowKey = row.getKey();
771
			final String cf = row.getColumnFamily();
772
			if (!table.containsKey(rowKey)) {
773
				table.put(rowKey, new HashMap<>());
774
			}
775
			if (!table.get(rowKey).containsKey(cf)) {
776
				table.get(rowKey).put(row.getColumnFamily(), new HashMap<>());
777
			}
778
			for (final Column<String, byte[]> c : row.getColumns()) {
779
				System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
780
				table.get(rowKey).get(cf).put(c.getName(), c.getValue());
781
				if (c.getName().equals("body")) {
782
					final String theBody = new String(c.getValue(), "UTF-8");
783
					assertTrue(StringUtils.isNotBlank(theBody));
784
					//System.out.println(theBody);
785
				}
786
			}
787
		}
788
		return table;
789

    
790
	}
791

    
792
	protected Map<String, XmlRecordFactory> mapAll(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
793

    
794
		final Map<String, XmlRecordFactory> builders = Maps.newHashMap();
795
		for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
796
			map(builders, e.getKey(), e.getValue());
797
		}
798
		return builders;
799
	}
800

    
801
	// private Map<String, XmlRecordFactory> mapResultsOnly(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
802
	//
803
	// final Map<String, XmlRecordFactory> builders = Maps.newHashMap();
804
	// for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
805
	// final Type type = OafRowKeyDecoder.decode(e.getKey()).getType();
806
	// if (type == Type.result) {
807
	// map(builders, e.getKey(), e.getValue());
808
	// }
809
	// }
810
	// return builders;
811
	// }
812

    
813
	private void map(final Map<String, XmlRecordFactory> builders, final String rowKey, final Map<String, Map<String, byte[]>> row) throws Exception {
814

    
815
		final Type type = OafRowKeyDecoder.decode(rowKey).getType();
816

    
817
		final Map<String, byte[]> familyMap = row.get(type.toString());
818

    
819
		if (familyMap == null) return;
820

    
821
		final byte[] bodyB = familyMap.get("body");
822

    
823
		if (bodyB != null) {
824
			ensureBuilder(builders, rowKey);
825

    
826
			final Oaf oaf = UpdateMerger.mergeBodyUpdates(familyMap);
827

    
828
			final OafDecoder mainEntity = OafDecoder.decode(oaf);
829

    
830
			builders.get(rowKey).setMainEntity(mainEntity);
831

    
832
			for (final LinkDescriptor ld : entityConfigTable.getDescriptors(type)) {
833

    
834
				final String it = ld.getRelDescriptor().getIt();
835
				final Map<String, byte[]> cols = row.get(it);
836

    
837
				if ((cols != null) && !cols.isEmpty()) {
838

    
839
					for (final byte[] oafB : cols.values()) {
840

    
841
						final Oaf.Builder relBuilder = Oaf.newBuilder(Oaf.parseFrom(oafB));
842

    
843
						if (ld.isSymmetric()) {
844
							final RelDescriptor rd = ld.getRelDescriptor();
845

    
846
							relBuilder.getRelBuilder().setCachedTarget(mainEntity.getEntity()).setRelType(rd.getRelType()).setSubRelType(rd.getSubRelType());
847
						}
848

    
849
						relBuilder.getRelBuilder().setChild(ld.isChild());
850

    
851
						final Oaf.Builder oafBuilder = Oaf.newBuilder().setKind(Kind.relation).setLastupdatetimestamp(System.currentTimeMillis());
852
						oafBuilder.mergeFrom(relBuilder.build());
853

    
854
						final String targetId = ld.isSymmetric() ? oafBuilder.getRel().getTarget() : oafBuilder.getRel().getSource();
855
						ensureBuilder(builders, targetId);
856
						final OafDecoder decoder = OafDecoder.decode(oafBuilder.build());
857

    
858
						if (ld.isChild()) {
859
							builders.get(targetId).addChild(type, decoder);
860
						} else {
861
							builders.get(targetId).addRelation(type, decoder);
862
						}
863
					}
864

    
865
				}
866
			}
867
		}
868

    
869
	}
870

    
871
	private void printAll(final Map<String, XmlRecordFactory> builders) throws DocumentException {
872
		print(Sets.newHashSet(Type.values()), builders, null);
873
	}
874

    
875
	private void print(final Set<Type> types, final Map<String, XmlRecordFactory> builders, final Map<Type, Set<String>> xpaths) throws DocumentException {
876
		final SAXReader r = new SAXReader();
877

    
878
		for (final Entry<String, XmlRecordFactory> e : builders.entrySet()) {
879
			final OafRowKeyDecoder kd = OafRowKeyDecoder.decode(e.getKey());
880

    
881
			if (!e.getValue().isValid()) throw new IllegalArgumentException("invalid builder: " + e.getKey());
882
			if (types.contains(kd.getType())) {
883
				final String val = IndentXmlString.apply(e.getValue().build());
884

    
885
				if ((xpaths != null) && !xpaths.isEmpty() && (xpaths.get(kd.getType()) != null)) {
886
					final Document doc = r.read(new StringReader(val));
887

    
888
					log.debug("\n" + e.getKey());
889
					for (final String xpath : xpaths.get(kd.getType())) {
890
						log.debug(doc.valueOf(xpath));
891
					}
892
				} else {
893
					log.info(val);
894
				}
895
			}
896
		}
897
	}
898

    
899
	private void printNoIndent(final Map<String, XmlRecordFactory> builders) {
900
		for (final Entry<String, XmlRecordFactory> e : builders.entrySet()) {
901
			if (e.getValue().isValid()) {
902
				log.debug(e.getValue().build());
903
			} else {
904
				log.debug("invalid builder: " + e.getKey());
905
			}
906
		}
907
	}
908

    
909
	private void ensureBuilder(final Map<String, XmlRecordFactory> builders, final String rowKey) throws Exception {
910
		if (!builders.containsKey(rowKey)) {
911
			builders.put(rowKey, newBuilder());
912
		}
913
	}
914

    
915
	private XmlRecordFactory newBuilder() throws TransformerConfigurationException, TransformerFactoryConfigurationError, DocumentException {
916
		return new XmlRecordFactory(entityConfigTable, ContextMapper.fromXml(Context.xml),
917
				RelClasses.fromJSon(RelClassesTest.relClassesJson), XmlRecordFactoryTest.SCHEMA_LOCATION, true, false, false, XmlRecordFactoryTest.specialDatasourceTypes);
918
	}
919

    
920
	private InputStream load(final String fileName) {
921
		return getClass().getResourceAsStream(fileName);
922
	}
923

    
924
	private InputStream loadFromTransformationProfile(final String profilePath) {
925
		log.info("Loading xslt from: " + basePathProfiles + profilePath);
926
		InputStream profile = getClass().getResourceAsStream(basePathProfiles + profilePath);
927
		SAXReader saxReader = new SAXReader();
928
		Document doc = null;
929
		try {
930
			doc = saxReader.read(profile);
931
		} catch (DocumentException e) {
932
			e.printStackTrace();
933
			throw new RuntimeException(e);
934
		}
935
		String xslt = doc.selectSingleNode("//SCRIPT/CODE/*[local-name()='stylesheet']").asXML();
936
		//log.info(xslt);
937
		return IOUtils.toInputStream(xslt);
938
	}
939

    
940

    
941
	private Row asRowFromJson(String json) throws ParseException {
942
		Oaf.Builder oafBuilder = Oaf.newBuilder();
943
		JsonFormat.merge(json, oafBuilder);
944
		final Oaf oaf = oafBuilder.build();
945
		Row row = new Row("result", oaf.getEntity().getId());
946
		Column<String, byte[]> c = new Column<>("body", oaf.toByteArray());
947
		row.setColumn("body", c);
948
		return row;
949

    
950
	}
951

    
952
}
    (1-1/1)