Project

General

Profile

1
package eu.dnetlib.data.transform;
2

    
3
import java.io.*;
4
import java.util.*;
5
import java.util.Map.Entry;
6
import java.util.zip.GZIPInputStream;
7
import javax.xml.transform.TransformerConfigurationException;
8
import javax.xml.transform.TransformerFactoryConfigurationError;
9

    
10
import com.google.common.base.Function;
11
import com.google.common.collect.Iterables;
12
import com.google.common.collect.Lists;
13
import com.google.common.collect.Maps;
14
import com.google.common.collect.Sets;
15
import com.google.protobuf.InvalidProtocolBufferException;
16
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
17
import eu.dnetlib.data.mapreduce.util.*;
18
import eu.dnetlib.data.proto.KindProtos.Kind;
19
import eu.dnetlib.data.proto.OafProtos.Oaf;
20
import eu.dnetlib.data.proto.TypeProtos.Type;
21
import eu.dnetlib.miscutils.functional.xml.IndentXmlString;
22
import org.apache.commons.io.IOUtils;
23
import org.apache.commons.lang.StringUtils;
24
import org.apache.commons.logging.Log;
25
import org.apache.commons.logging.LogFactory;
26
import org.dom4j.Document;
27
import org.dom4j.DocumentException;
28
import org.dom4j.io.SAXReader;
29
import org.json.JSONObject;
30
import org.junit.Before;
31
import org.junit.Test;
32

    
33
import static org.junit.Assert.*;
34

    
35
public class XsltRowTransformerFactoryTest {
36

    
37
	private static final Log log = LogFactory.getLog(XsltRowTransformerFactoryTest.class);
38
	private static String basePathProfiles = "/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/";
39
	private XsltRowTransformerFactory factory;
40
	private EntityConfigTable entityConfigTable;
41

    
42
	@Before
43
	public void setUp() throws Exception {
44
		factory = new XsltRowTransformerFactory();
45
		entityConfigTable = IndexConfig.load(IndexConfigTest.config).getConfigMap();
46
	}
47

    
48
	@Test
49
	public void testParseManyAuthors() throws Exception {
50

    
51
		final Map<String, Object> xslParams = Maps.newHashMap();
52

    
53
		final Map<String, String> m = Maps.newHashMap();
54

    
55
		m.put("od______2367", "true"); // Puma
56
		m.put("od______2294", "true"); // UNIBI
57
		m.put("od________18", "false"); // Arxiv
58

    
59
		xslParams.put("mergeIdForHomonymsMap", m);
60

    
61
		final List<Row> rows = asRows(loadFromTransformationProfile("oaf_person2hbase.xml"), xslParams, load("recordManyAuthors.xml"));
62
		int authorCount = 0;
63
		for (final Row row : rows) {
64
			for (final Column<String, byte[]> col : row.getColumns()) {
65

    
66
				authorCount++;
67
				final OafDecoder d = OafDecoder.decode(col.getValue());
68
				assertNotNull(d);
69

    
70
				log.debug(d.getEntity().getPerson().getCoauthorList().size());
71
			}
72
		}
73
		log.info("authors' count: ---> " + authorCount);
74

    
75
	}
76

    
77
	@Test
78
	public void testParseAuthors() throws Exception {
79

    
80
		final Map<String, Object> xslParams = Maps.newHashMap();
81

    
82
		final Map<String, String> m = Maps.newHashMap();
83

    
84
		m.put("od______2367", "true"); // Puma
85
		m.put("od______2294", "true"); // UNIBI
86
		m.put("od________18", "false"); // Arxiv
87

    
88
		xslParams.put("mergeIdForHomonymsMap", m);
89

    
90
		final List<Row> rows = asRows(loadFromTransformationProfile("oaf_person2hbase.xml"), xslParams, load("recordArxiv.xml"));
91

    
92
		for (final Row row : rows) {
93
			for (final Column<String, byte[]> col : row.getColumns()) {
94

    
95
				final OafDecoder d = OafDecoder.decode(col.getValue());
96
				log.debug(d.getOaf());
97
			}
98
		}
99
	}
100

    
101
	@Test
102
	public void testParseOafClaimUpdate() throws Exception {
103
		doTest(loadFromTransformationProfile("claimUpdates_2_hbase.xsl"), load("recordClaimUpdate.xml"));
104
	}
105

    
106
	@Test
107
	public void testParseClaimUpdate() throws Exception {
108

    
109
		final List<Row> rows = Lists.newArrayList();
110
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordClaim.xml")));
111
		rows.addAll(asRows(loadFromTransformationProfile("claimUpdates_2_hbase.xsl"), load("recordClaimUpdate.xml")));
112

    
113
		printAll(mapAll(buildTable(rows)));
114
	}
115

    
116
	@Test
117
	public void testParseDatasetPUB() throws Exception {
118

    
119
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatasetPUB.xml"));
120
	}
121

    
122
	@Test
123
	public void testParseClaim() throws Exception {
124

    
125
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordClaim.xml"));
126
	}
127

    
128
	@Test
129
	public void testParseClaimDataset() throws Exception {
130

    
131
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordClaimDataset.xml"));
132
	}
133

    
134
	@Test
135
	public void testParseACM() throws Exception {
136

    
137
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordACM.xml"));
138
	}
139

    
140
	@Test
141
	public void testParseASB() throws Exception {
142

    
143
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordASB.xml"));
144
	}
145

    
146
	@Test
147
	public void testParseProjectCorda() throws Exception {
148

    
149
		doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml"));
150
	}
151

    
152
	@Test
153
	public void testParseProjectFCT() throws Exception {
154

    
155
		doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordFCT.xml"));
156
	}
157

    
158

    
159
	@Test
160
	public void testParseOaf() throws Exception {
161

    
162
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("record.xml"));
163
	}
164

    
165
	@Test
166
	public void testParseOafPublication() throws Exception {
167

    
168
		doTest(loadFromTransformationProfile("oaf_publication2hbase.xml"), load("record.xml"));
169
	}
170

    
171
	@Test
172
	public void testParseLindat() throws Exception {
173

    
174
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordLindat.xml"));
175
	}
176

    
177
	@Test
178
	public void testParseDatacite() throws Exception {
179

    
180
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatacite.xml"));
181
	}
182

    
183
	@Test
184
	public void testParseDatacite2() throws Exception {
185

    
186
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatacite2.xml"));
187
	}
188

    
189
	@Test
190
	public void testParseOpenTrials() throws Exception {
191

    
192
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("opentrials_datacite1.xml"));
193
	}
194

    
195
	@Test
196
	public void testLinkPangaea() throws Exception {
197

    
198
		final List<Row> rows = Lists.newArrayList();
199
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("pangaeODF1.xml")));
200
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("pangaeODF2.xml")));
201
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("pangaeOAF.xml")));
202
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCordaPangaea.xml")));
203

    
204
		printAll(mapAll(buildTable(rows)));
205
	}
206

    
207
	@Test
208
	public void testPangaea() throws Exception {
209

    
210
		final List<Row> rows = Lists.newArrayList();
211
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("pangaeOAF2.xml")));
212
		printAll(mapAll(buildTable(rows)));
213
	}
214
	@Test
215
	public void testZenodo() throws Exception {
216

    
217
		final List<Row> rows = Lists.newArrayList();
218
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("zenodoData.xml")));
219
		printAll(mapAll(buildTable(rows)));
220
	}
221

    
222
	@Test
223
	public void testLinkCorda() throws Exception {
224

    
225
		final List<Row> rows = Lists.newArrayList();
226
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
227
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordCorda.xml")));
228

    
229
		printAll(mapAll(buildTable(rows)));
230
	}
231

    
232
	@Test
233
	public void testLinkFCT() throws Exception {
234

    
235
		final List<Row> rows = Lists.newArrayList();
236
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordFCT.xml")));
237
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordFCT.xml")));
238

    
239
		printAll(mapAll(buildTable(rows)));
240
	}
241

    
242
	@Test
243
	public void testLinkARC() throws Exception {
244

    
245
		final List<Row> rows = Lists.newArrayList();
246
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordARC.xml")));
247
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordARC.xml")));
248

    
249
		printAll(mapAll(buildTable(rows)));
250
	}
251

    
252
	@Test
253
	public void testLinkWT() throws Exception {
254

    
255
		final List<Row> rows = Lists.newArrayList();
256
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordWT.xml")));
257
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordWT.xml")));
258

    
259
		printAll(mapAll(buildTable(rows)));
260
	}
261

    
262

    
263

    
264
	@Test
265
	public void testLinkOrganization() throws Exception {
266

    
267
		final List<Row> rows = Lists.newArrayList();
268
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organization.xml")));
269
		rows.addAll(asRows(loadFromTransformationProfile("projectorganization_2_hbase.xsl"), load("project_organization.xml")));
270
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
271

    
272
		printAll(mapAll(buildTable(rows)));
273
	}
274

    
275
	@Test
276
	public void testLinkOrganizationAffiliation() throws Exception {
277

    
278
		final List<Row> rows = Lists.newArrayList();
279
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organization.xml")));
280
		rows.addAll(asRows(loadFromTransformationProfile("resultorganization_2_hbase.xsl"), load("result_organization.xml")));
281
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("record.xml")));
282

    
283
		printAll(mapAll(buildTable(rows)));
284
	}
285

    
286
	@Test
287
	public void testUnpackAuthors() throws Exception {
288

    
289
		final Map<String, Object> xslParams = Maps.newHashMap();
290

    
291
		final Map<String, String> m = Maps.newHashMap();
292

    
293
		m.put("od______2367", "true"); // Puma
294
		m.put("od______2294", "true"); // UNIBI
295
		m.put("od________18", "false"); // Arxiv
296

    
297
		xslParams.put("mergeIdForHomonymsMap", m);
298

    
299
		final List<Row> rows = Lists.newArrayList();
300
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordArxiv.xml")));
301
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordArxiv2.xml")));
302
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordPuma1.xml")));
303
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordPuma2.xml")));
304
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordUNIBI.xml")));
305

    
306
		printPersonFullnames(mapAll(buildTable(rows)));
307
	}
308

    
309
	@Test
310
	public void testDuplicates() throws Exception {
311
		final String mergeId = "50|dedup_wf_001::08ed625d07e5738b794ff14d6773fd9f";
312
		final List<Row> rows = Lists.newArrayList();
313

    
314
		final Function<Row, Row> f = new Function<Row, Row>() {
315
			@Override
316
			public Row apply(final Row rowIn) {
317

    
318
				final List<Column<String,byte[]>> cols = Lists.newArrayList();
319
				for(Column<String,byte[]> col : rowIn.getColumns()) {
320
					if (col.getName().equals("body")) {
321
						cols.add(new Column(col.getName(), col.getValue()));
322

    
323
					}
324
				}
325
				return new Row("result", rowIn.getKey(), cols);
326
			}
327
		};
328

    
329
		final List<Row> puma1 = asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordPuma1.xml"), f);
330
		puma1.add(new Row("resultResult_dedup_isMergedIn", mergeId));
331

    
332
		final List<Row> puma2 = asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordPuma2.xml"), f);
333
		puma2.add(new Row("resultResult_dedup_isMergedIn", mergeId));
334

    
335
		rows.addAll(puma1);
336
		rows.addAll(puma2);
337

    
338
		List<Oaf> duplicates = Lists.newArrayList();
339
		duplicates.add(getOafBody(puma1));
340
		duplicates.add(getOafBody(puma2));
341
		final Oaf.Builder oafMerge = OafEntityMerger.merge(mergeId, duplicates);
342

    
343
		final Row mergeRow = new Row("result", mergeId, Lists.newArrayList(new Column("body", oafMerge.build().toByteArray())));
344

    
345
		rows.add(mergeRow);
346

    
347
		printAll(mapAll(buildTable(rows)));
348
	}
349

    
350
	private Oaf getOafBody(final List<Row> rows) throws InvalidProtocolBufferException {
351
		for(Row row : rows) {
352
			if(StringUtils.startsWith(row.getKey(), "50")) {
353
				return Oaf.parseFrom(row.getColumn("body").getValue());
354

    
355
			}
356
		}
357
		return null;
358
	}
359

    
360
	@Test
361
	public void testParseDoajOAF() throws Exception {
362

    
363
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("doajUniversityRecord.xml"));
364
	}
365

    
366
	@Test
367
	public void testParseDatasource() throws Exception {
368

    
369
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("datasourceNative.xml"));
370
	}
371
	@Test
372
	public void testParseDatasourcePiwik() throws Exception {
373

    
374
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("datasourcePiwik.xml"));
375
	}
376

    
377
	@Test
378
	public void testParseDataDatasource() throws Exception {
379

    
380
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("dataDatasource.xml"));
381
	}
382

    
383
	@Test
384
	public void testFromMongodbCompressedDump() throws Exception {
385
		doTestJsonGz(loadFromTransformationProfile("oaf2hbase.xml"), load("mdstore_cleaned.json.gz"));
386
	}
387

    
388
	@Test
389
	public void testLoadFromTransformationProfile() throws IOException {
390
		InputStream in = loadFromTransformationProfile("oaf2hbase.xml");
391
		log.info(IOUtils.toString(in));
392
	}
393

    
394
	@Test
395
	public void test_template() throws Exception {
396
		final String xslt = IOUtils.toString(loadFromTransformationProfile("oaf2hbase.xml"));
397
		final XsltRowTransformer transformer = factory.getTransformer(xslt);
398
		assertNotNull(transformer);
399

    
400
		final String record = IOUtils.toString(load("record.xml"));
401
		final List<Row> rows = transformer.apply(record);
402

    
403
		System.out.println(rows);
404
	}
405

    
406
	@Test
407
	public void testWrongCharsOrganization() throws Exception {
408
		final List<Row> rows = Lists.newArrayList();
409
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organizationWrongChars.xml")));
410
		printAll(mapAll(buildTable(rows)));
411
	}
412

    
413
	@Test
414
	public void testParseProjectWithFunderOriginalName() throws Exception {
415

    
416
		doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectWithFunderOriginalName.xml"));
417
	}
418
	@Test
419
	public void testLinkFunderOriginalName() throws Exception {
420

    
421
		final List<Row> rows = Lists.newArrayList();
422
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectWithFunderOriginalName.xml")));
423
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordFunderOriginalName.xml")));
424

    
425
		printAll(mapAll(buildTable(rows)));
426
	}
427

    
428
	@Test
429
	public void testProjectExtraInfo() throws Exception {
430
		final List<Row> rows = Lists.newArrayList();
431
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordExtraInfo.xml")));
432
		printAll(mapAll(buildTable(rows)));
433
	}
434

    
435
	@Test
436
	public void testParseSoftwareFromODF() throws Exception {
437
		final List<Row> rows = Lists.newArrayList();
438
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("softwareODF.xml")));
439
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
440
		printAll(mapAll(buildTable(rows)));
441
	}
442

    
443
	@Test(expected = AssertionError.class)
444
	public void testParseSoftwareFromOAF() throws Exception {
445
		final List<Row> rows = Lists.newArrayList();
446
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordOAFsoftware.xml")));
447
		printAll(mapAll(buildTable(rows)));
448
	}
449

    
450
	private void doTest(final InputStream xsltStream, final InputStream recordStream) throws Exception {
451
		try {
452
			final List<Row> rows = asRows(xsltStream, recordStream);
453

    
454
			log.info(rows);
455

    
456
			final Map<String, Map<String, Map<String, byte[]>>> table = buildTable(rows);
457

    
458
			// System.out.println("\n" + table.toString());
459

    
460
			final Map<String, XmlRecordFactory> builders = mapAll(table);
461

    
462
			printAll(builders);
463
		} catch (final InvalidProtocolBufferException e) {
464
			throw new Exception(e);
465
		} catch (final TransformerConfigurationException e) {
466
			throw new Exception(e);
467
		} catch (final TransformerFactoryConfigurationError e) {
468
			throw new Exception(e);
469
		} catch (final DocumentException e) {
470
			throw new Exception(e);
471
		}
472
	}
473

    
474
	private void doTestJsonGz(final InputStream xsltStream, final InputStream recordStream) throws Exception {
475

    
476
		final Iterator<List<Row>> rowsIterator = asRowsJsonGzip(xsltStream, recordStream);
477

    
478
		int i = 0;
479
		while (rowsIterator.hasNext()) {
480
			final List<Row> rows = rowsIterator.next();
481
			i++;
482

    
483
			if ((i % 10000) == 0) {
484
				System.out.println(i);
485
			}
486

    
487
			final Map<String, Map<String, Map<String, byte[]>>> table = buildTableDoaj(rows);
488

    
489
			for (final Map<String, Map<String, byte[]>> m : table.values()) {
490
				for (final Map<String, byte[]> mv : m.values()) {
491
					for (final byte[] v : mv.values()) {
492
						final OafDecoder d = OafDecoder.decode(v);
493
						assertNotNull(d);
494
						assertNotNull(d.getOaf());
495

    
496
						switch (d.getKind()) {
497
						case entity:
498
							assertNotNull(d.getMetadata());
499
							if (d.getOaf().getEntity().getType().equals(Type.result)) {
500
								System.out.println(d.getOaf());
501
							}
502
							break;
503
						case relation:
504
							assertNotNull(d.getRel());
505
							break;
506
						default:
507
							break;
508
						}
509
					}
510
				}
511
			}
512
		}
513
	}
514

    
515
	private List<Row> asRows(final InputStream xsltStream, final InputStream recordStream, final Function<Row, Row> p) throws Exception {
516
		return asRows(xsltStream, new HashMap<>(), recordStream, p);
517
	}
518

    
519
	private List<Row> asRows(final InputStream xsltStream, final InputStream recordStream) throws Exception {
520
		return asRows(xsltStream, new HashMap<>(), recordStream);
521
	}
522

    
523
	private List<Row> asRows(final InputStream xsltStream, final Map<String, Object> params, final InputStream recordStream) throws Exception {
524
		return asRows(xsltStream, params, recordStream, null);
525
	}
526

    
527
	private List<Row> asRows(final InputStream xsltStream, final Map<String, Object> params, final InputStream recordStream, final Function<Row, Row> p) throws Exception {
528
		final String xslt = IOUtils.toString(xsltStream);
529
		final XsltRowTransformer transformer = factory.getTransformer(xslt, params);
530
		assertNotNull(transformer);
531

    
532
		final String record = IOUtils.toString(recordStream);
533
		final List<Row> rows = transformer.apply(record);
534

    
535
		assertNotNull(rows);
536
		assertFalse(rows.isEmpty());
537
		return p == null ? rows : Lists.newArrayList(Iterables.transform(rows, p));
538
	}
539

    
540
	private Iterator<List<Row>> asRowsJsonGzip(final InputStream xsltStream, final InputStream recordStreamJsonGzip) throws Exception {
541
		final String xslt = IOUtils.toString(xsltStream);
542
		final XsltRowTransformer transformer = factory.getTransformer(xslt);
543
		assertNotNull(transformer);
544
		assertNotNull(recordStreamJsonGzip);
545

    
546
		final GZIPInputStream stream = new GZIPInputStream(recordStreamJsonGzip);
547
		assertNotNull(stream);
548
		final BufferedReader inStream = new BufferedReader(new InputStreamReader(stream));
549
		assertNotNull(inStream);
550
		return new Iterator<List<Row>>() {
551

    
552
			String jsonRecord = null;
553

    
554
			@Override
555
			public boolean hasNext() {
556
				try {
557
					return (jsonRecord = inStream.readLine()) != null;
558
				} catch (final IOException e) {
559
					throw new RuntimeException(e);
560
				}
561
			}
562

    
563
			@Override
564
			public List<Row> next() {
565

    
566
				final JSONObject jsonObj = new JSONObject(jsonRecord);
567
				final String body = jsonObj.getString("body");
568
				try {
569
					assertTrue(StringUtils.isNotBlank(body));
570
					// System.out.println(body);
571
					final List<Row> rows = transformer.apply(body);
572
					assertNotNull(rows);
573
					assertFalse(rows.isEmpty());
574
					return rows;
575
				} catch (final Throwable e) {
576
					System.err.println("error transforming document: " + body);
577
					throw new RuntimeException(e);
578
				}
579
			}
580

    
581
			@Override
582
			public void remove() {
583
				throw new UnsupportedOperationException();
584
			}
585

    
586
		};
587

    
588
	}
589

    
590
	private Map<String, Map<String, Map<String, byte[]>>> buildTableDoaj(final List<Row> rows) throws UnsupportedEncodingException {
591
		final Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
592

    
593
		for (final Row row : rows) {
594
			final String rowKey = row.getKey();
595
			final String cf = row.getColumnFamily();
596
			if (!table.containsKey(rowKey)) {
597
				table.put(rowKey, new HashMap<>());
598
			}
599
			if (!table.get(rowKey).containsKey(cf)) {
600
				table.get(rowKey).put(row.getColumnFamily(), new HashMap<>());
601
			}
602
			for (final Column<String, byte[]> c : row.getColumns()) {
603
				// System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
604
				table.get(rowKey).get(cf).put(c.getName(), c.getValue());
605
				if (cf.equals("result") && c.getName().equals("body")) {
606
					// System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
607
					assertTrue(StringUtils.isNotBlank(new String(c.getValue(), "UTF-8")));
608
				}
609
			}
610
		}
611
		return table;
612

    
613
	}
614

    
615
	private Map<String, Map<String, Map<String, byte[]>>> buildTable(final List<Row> rows) throws UnsupportedEncodingException {
616
		final Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
617

    
618
		for (final Row row : rows) {
619
			final String rowKey = row.getKey();
620
			final String cf = row.getColumnFamily();
621
			if (!table.containsKey(rowKey)) {
622
				table.put(rowKey, new HashMap<String, Map<String, byte[]>>());
623
			}
624
			if (!table.get(rowKey).containsKey(cf)) {
625
				table.get(rowKey).put(row.getColumnFamily(), new HashMap<String, byte[]>());
626
			}
627
			for (final Column<String, byte[]> c : row.getColumns()) {
628
				System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
629
				table.get(rowKey).get(cf).put(c.getName(), c.getValue());
630
				if (c.getName().equals("body")) {
631
					final String theBody = new String(c.getValue(), "UTF-8");
632
					assertTrue(StringUtils.isNotBlank(theBody));
633
					//System.out.println(theBody);
634
				}
635
			}
636
		}
637
		return table;
638

    
639
	}
640

    
641
	private Map<String, XmlRecordFactory> mapAll(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
642

    
643
		final Map<String, XmlRecordFactory> builders = Maps.newHashMap();
644
		for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
645
			map(builders, e.getKey(), e.getValue());
646
		}
647
		return builders;
648
	}
649

    
650
	// private Map<String, XmlRecordFactory> mapResultsOnly(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
651
	//
652
	// final Map<String, XmlRecordFactory> builders = Maps.newHashMap();
653
	// for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
654
	// final Type type = OafRowKeyDecoder.decode(e.getKey()).getType();
655
	// if (type == Type.result) {
656
	// map(builders, e.getKey(), e.getValue());
657
	// }
658
	// }
659
	// return builders;
660
	// }
661

    
662
	private void map(final Map<String, XmlRecordFactory> builders, final String rowKey, final Map<String, Map<String, byte[]>> row) throws Exception {
663

    
664
		final Type type = OafRowKeyDecoder.decode(rowKey).getType();
665

    
666
		final Map<String, byte[]> familyMap = row.get(type.toString());
667

    
668
		if (familyMap == null) return;
669

    
670
		final byte[] bodyB = familyMap.get("body");
671

    
672
		if (bodyB != null) {
673
			ensureBuilder(builders, rowKey);
674

    
675
			final Oaf oaf = UpdateMerger.mergeBodyUpdates(familyMap);
676

    
677
			final OafDecoder mainEntity = OafDecoder.decode(oaf);
678

    
679
			builders.get(rowKey).setMainEntity(mainEntity);
680

    
681
			for (final LinkDescriptor ld : entityConfigTable.getDescriptors(type)) {
682

    
683
				final String it = ld.getRelDescriptor().getIt();
684
				final Map<String, byte[]> cols = row.get(it);
685

    
686
				if ((cols != null) && !cols.isEmpty()) {
687

    
688
					for (final byte[] oafB : cols.values()) {
689

    
690
						final Oaf.Builder relBuilder = Oaf.newBuilder(Oaf.parseFrom(oafB));
691

    
692
						if (ld.isSymmetric()) {
693
							final RelDescriptor rd = ld.getRelDescriptor();
694

    
695
							relBuilder.getRelBuilder().setCachedTarget(mainEntity.getEntity()).setRelType(rd.getRelType()).setSubRelType(rd.getSubRelType());
696
						}
697

    
698
						relBuilder.getRelBuilder().setChild(ld.isChild());
699

    
700
						final Oaf.Builder oafBuilder = Oaf.newBuilder().setKind(Kind.relation).setLastupdatetimestamp(System.currentTimeMillis());
701
						oafBuilder.mergeFrom(relBuilder.build());
702

    
703
						final String targetId = ld.isSymmetric() ? oafBuilder.getRel().getTarget() : oafBuilder.getRel().getSource();
704
						ensureBuilder(builders, targetId);
705
						final OafDecoder decoder = OafDecoder.decode(oafBuilder.build());
706

    
707
						if (ld.isChild()) {
708
							builders.get(targetId).addChild(type, decoder);
709
						} else {
710
							builders.get(targetId).addRelation(type, decoder);
711
						}
712
					}
713

    
714
				}
715
			}
716
		}
717

    
718
	}
719

    
720
	private void printAll(final Map<String, XmlRecordFactory> builders) throws DocumentException {
721
		print(Sets.newHashSet(Type.values()), builders, null);
722
	}
723

    
724
	private void printPerson(final Map<String, XmlRecordFactory> builders) throws DocumentException {
725
		print(Sets.newHashSet(Type.person), builders, null);
726
	}
727

    
728
	private void printPersonFullnames(final Map<String, XmlRecordFactory> builders) throws DocumentException {
729

    
730
		final Map<Type, Set<String>> xpaths = Maps.newHashMap();
731

    
732
		final Set<String> personPaths = Sets.newHashSet();
733

    
734
		personPaths.add("//fullname");
735

    
736
		xpaths.put(Type.person, personPaths);
737

    
738
		print(Sets.newHashSet(Type.person), builders, xpaths);
739
	}
740

    
741
	private void print(final Set<Type> types, final Map<String, XmlRecordFactory> builders, final Map<Type, Set<String>> xpaths) throws DocumentException {
742
		final SAXReader r = new SAXReader();
743

    
744
		for (final Entry<String, XmlRecordFactory> e : builders.entrySet()) {
745
			final OafRowKeyDecoder kd = OafRowKeyDecoder.decode(e.getKey());
746

    
747
			if (!e.getValue().isValid()) throw new IllegalArgumentException("invalid builder: " + e.getKey());
748
			if (types.contains(kd.getType())) {
749
				final String val = IndentXmlString.apply(e.getValue().build());
750

    
751
				if ((xpaths != null) && !xpaths.isEmpty() && (xpaths.get(kd.getType()) != null)) {
752
					final Document doc = r.read(new StringReader(val));
753

    
754
					log.debug("\n" + e.getKey());
755
					for (final String xpath : xpaths.get(kd.getType())) {
756
						log.debug(doc.valueOf(xpath));
757
					}
758
				} else {
759
					log.info(val);
760
				}
761
			}
762
		}
763
	}
764

    
765
	private void printNoIndent(final Map<String, XmlRecordFactory> builders) {
766
		for (final Entry<String, XmlRecordFactory> e : builders.entrySet()) {
767
			if (e.getValue().isValid()) {
768
				log.debug(e.getValue().build());
769
			} else {
770
				log.debug("invalid builder: " + e.getKey());
771
			}
772
		}
773
	}
774

    
775
	private void ensureBuilder(final Map<String, XmlRecordFactory> builders, final String rowKey) throws Exception {
776
		if (!builders.containsKey(rowKey)) {
777
			builders.put(rowKey, newBuilder());
778
		}
779
	}
780

    
781
	private XmlRecordFactory newBuilder() throws TransformerConfigurationException, TransformerFactoryConfigurationError, DocumentException {
782
		return new XmlRecordFactory(entityConfigTable, ContextMapper.fromXml(Context.xml),
783
				RelClasses.fromJSon(RelClassesTest.relClassesJson), XmlRecordFactoryTest.SCHEMA_LOCATION, true, false, false, XmlRecordFactoryTest.specialDatasourceTypes);
784
	}
785

    
786
	private InputStream load(final String fileName) {
787
		return getClass().getResourceAsStream(fileName);
788
	}
789

    
790
	private InputStream loadFromTransformationProfile(final String profilePath) {
791
		log.info("Loading xslt from: " + basePathProfiles + profilePath);
792
		InputStream profile = getClass().getResourceAsStream(basePathProfiles + profilePath);
793
		SAXReader saxReader = new SAXReader();
794
		Document doc = null;
795
		try {
796
			doc = saxReader.read(profile);
797
		} catch (DocumentException e) {
798
			e.printStackTrace();
799
			throw new RuntimeException(e);
800
		}
801
		String xslt = doc.selectSingleNode("//SCRIPT/CODE/*[local-name()='stylesheet']").asXML();
802
		return IOUtils.toInputStream(xslt);
803
	}
804

    
805
}
    (1-1/1)