Project

General

Profile

1
package eu.dnetlib.data.transform;
2

    
3
import java.io.*;
4
import java.util.*;
5
import java.util.Map.Entry;
6
import java.util.zip.GZIPInputStream;
7
import javax.xml.transform.TransformerConfigurationException;
8
import javax.xml.transform.TransformerFactoryConfigurationError;
9

    
10
import com.google.common.collect.Lists;
11
import com.google.common.collect.Maps;
12
import com.google.common.collect.Sets;
13
import com.google.protobuf.ExtensionRegistry;
14
import com.google.protobuf.InvalidProtocolBufferException;
15
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
16
import eu.dnetlib.data.mapreduce.util.*;
17
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
18
import eu.dnetlib.data.proto.DliFieldTypeProtos;
19
import eu.dnetlib.data.proto.DliProtos;
20
import eu.dnetlib.data.proto.DliRels;
21
import eu.dnetlib.data.proto.KindProtos.Kind;
22
import eu.dnetlib.data.proto.TypeProtos.Type;
23
import eu.dnetlib.miscutils.functional.xml.XMLIndenter;
24
import org.apache.commons.io.IOUtils;
25
import org.apache.commons.lang3.StringUtils;
26
import org.apache.commons.logging.Log;
27
import org.apache.commons.logging.LogFactory;
28
import org.dom4j.Document;
29
import org.dom4j.DocumentException;
30
import org.dom4j.io.SAXReader;
31
import org.json.JSONObject;
32
import org.junit.Before;
33
import org.junit.Test;
34

    
35
import static org.junit.Assert.*;
36

    
37
public class XsltRowTransformerFactoryTest {
38

    
39
	private static final Log log = LogFactory.getLog(XsltRowTransformerFactoryTest.class);
40
	private static String basePathProfiles = "/eu/dnetlib/bootstrap/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/";
41
	private XsltRowTransformerFactory factory;
42
	private EntityConfigTable entityConfigTable;
43

    
44
	@Before
45
	public void setUp() throws Exception {
46
		factory = new XsltRowTransformerFactory();
47
		entityConfigTable = IndexConfig.load(IndexConfigTest.config).getConfigMap();
48
	}
49

    
50
	@Test
51
	public void testParseManyAuthors() throws Exception {
52

    
53
		final Map<String, Object> xslParams = Maps.newHashMap();
54

    
55
		final Map<String, String> m = Maps.newHashMap();
56

    
57
		m.put("od______2367", "true"); // Puma
58
		m.put("od______2294", "true"); // UNIBI
59
		m.put("od________18", "false"); // Arxiv
60

    
61
		xslParams.put("mergeIdForHomonymsMap", m);
62

    
63
		final List<Row> rows = asRows(loadFromTransformationProfile("oaf_person2hbase.xml"), xslParams, load("recordManyAuthors.xml"));
64
		int authorCount = 0;
65
		for (final Row row : rows) {
66
			for (final Column<String, byte[]> col : row.getColumns()) {
67

    
68
				authorCount++;
69
				final DNGFDecoder d = DNGFDecoder.decode(col.getValue());
70
				assertNotNull(d);
71

    
72
				log.debug(d.getEntity().getPerson().getCoauthorList().size());
73
			}
74
		}
75
		log.info("authors' count: ---> " + authorCount);
76

    
77
	}
78

    
79
	@Test
80
	public void testParseAuthors() throws Exception {
81

    
82
		final Map<String, Object> xslParams = Maps.newHashMap();
83

    
84
		final Map<String, String> m = Maps.newHashMap();
85

    
86
		m.put("od______2367", "true"); // Puma
87
		m.put("od______2294", "true"); // UNIBI
88
		m.put("od________18", "false"); // Arxiv
89

    
90
		xslParams.put("mergeIdForHomonymsMap", m);
91

    
92
		final List<Row> rows = asRows(loadFromTransformationProfile("oaf_person2hbase.xml"), xslParams, load("recordArxiv.xml"));
93

    
94
		for (final Row row : rows) {
95
			for (final Column<String, byte[]> col : row.getColumns()) {
96

    
97
				final DNGFDecoder d = DNGFDecoder.decode(col.getValue());
98
				log.debug(d.getDNGF());
99
			}
100
		}
101
	}
102

    
103
	@Test
104
	public void testParseDNGFClaimUpdate() throws Exception {
105
		doTest(loadFromTransformationProfile("claimUpdates_2_hbase.xml"), load("recordClaimUpdate.xml"));
106
	}
107

    
108
	@Test
109
	public void testParseClaimUpdate() throws Exception {
110

    
111
		final List<Row> rows = Lists.newArrayList();
112
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), load("recordClaim.xml")));
113
		rows.addAll(asRows(loadFromTransformationProfile("claimUpdates_2_hbase.xml"), load("recordClaimUpdate.xml")));
114

    
115
		printAll(mapAll(buildTable(rows)));
116
	}
117

    
118
	@Test
119
	public void testParseDatasetPUB() throws Exception {
120

    
121
		doTest(loadFromTransformationProfile("dmf2hbase.xml"), load("recordDatasetPUB.xml"));
122
	}
123

    
124
	@Test
125
	public void testParseClaim() throws Exception {
126

    
127
		doTest(loadFromTransformationProfile("pmf2hbase.xml"), load("recordClaim.xml"));
128
	}
129

    
130
	@Test
131
	public void testParseClaimDataset() throws Exception {
132

    
133
		doTest(loadFromTransformationProfile("dmf2hbase.xml"), load("recordClaimDataset.xml"));
134
	}
135

    
136
	@Test
137
	public void testParseACM() throws Exception {
138

    
139
		doTest(loadFromTransformationProfile("pmf2hbase.xml"), load("recordACM.xml"));
140
	}
141

    
142
	@Test
143
	public void testParseASB() throws Exception {
144

    
145
		doTest(loadFromTransformationProfile("pmf2hbase.xml"), load("recordASB.xml"));
146
	}
147

    
148
	@Test
149
	public void testParseProjectCorda() throws Exception {
150

    
151
		doTest(loadFromTransformationProfile("project_2_hbase.xml"), load("projectRecordCorda.xml"));
152
	}
153

    
154
	@Test
155
	public void testParseProjectFCT() throws Exception {
156

    
157
		doTest(loadFromTransformationProfile("project_2_hbase.xml"), load("projectRecordFCT.xml"));
158
	}
159

    
160
	@Test
161
	public void testParseDNGF() throws Exception {
162

    
163
		doTest(loadFromTransformationProfile("pmf2hbase.xml"), load("record.xml"));
164
	}
165

    
166
	@Test
167
	public void testParseDNGFPublication() throws Exception {
168

    
169
		doTest(loadFromTransformationProfile("oaf_publication2hbase.xml"), load("record.xml"));
170
	}
171

    
172
	@Test
173
	public void testParseWDSDaaaatacite() throws Exception {
174

    
175
		doTest(loadFromTransformationProfile("dmfwds2hbase.xml"), load("recordWds.xml"));
176
	}
177

    
178

    
179
	@Test
180
	public void testParseDLIDatacite() throws Exception {
181

    
182
		doTest(loadFromTransformationProfile("dmfdli2hbase.xml"), load("record_dli_dmf.xml"));
183
	}
184

    
185
	@Test
186
	public void testParseDLIPMF() throws Exception {
187

    
188
		final List<Row> rows = Lists.newArrayList();
189
		rows.addAll(asRows(loadFromTransformationProfile("pmfdli2hbase.xml"), load("record_dli_pmf.xml")));
190

    
191
		final ExtensionRegistry r = ExtensionRegistry.newInstance();
192

    
193
		rows.forEach(row -> {
194
			row.getColumns().forEach(result -> {
195
						if (result != null) {
196
							final DNGFDecoder decoder =
197
									DNGFDecoder.decode(result.getValue(), DliFieldTypeProtos.completionStatus, DliProtos.resolvedfrom, DliProtos.completionStatus,
198
											DliRels.isRelatedTo);
199

    
200
							System.out.println("decoder.getDNGF().toString() = " + decoder.getDNGF().toString());
201
						}
202
					}
203
			);
204
		});
205
	}
206

    
207
	@Test
208
	public void testParseDatacite() throws Exception {
209

    
210
		doTest(loadFromTransformationProfile("dmf2hbase.xml"), load("recordDatacite.xml"));
211
	}
212

    
213
	@Test
214
	public void testParseDatacite2() throws Exception {
215

    
216
		doTest(loadFromTransformationProfile("dmf2hbase.xml"), load("recordDatacite2.xml"));
217
	}
218

    
219
	@Test
220
	public void testParseOpenTrials() throws Exception {
221

    
222
		doTest(loadFromTransformationProfile("dmf2hbase.xml"), load("opentrials_datacite1.xml"));
223
	}
224

    
225
	@Test
226
	public void testLinkPangaea() throws Exception {
227

    
228
		final List<Row> rows = Lists.newArrayList();
229
		rows.addAll(asRows(loadFromTransformationProfile("dmf2hbase.xml"), load("pangaeODF1.xml")));
230
		rows.addAll(asRows(loadFromTransformationProfile("dmf2hbase.xml"), load("pangaeODF2.xml")));
231
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), load("pangaeOAF.xml")));
232
		rows.addAll(asRows(loadFromTransformationProfile("project_2_hbase.xml"), load("projectRecordCordaPangaea.xml")));
233

    
234
		printAll(mapAll(buildTable(rows)));
235
	}
236

    
237
	@Test
238
	public void testLinkCorda() throws Exception {
239

    
240
		final List<Row> rows = Lists.newArrayList();
241
		rows.addAll(asRows(loadFromTransformationProfile("project_2_hbase.xml"), load("projectRecordCorda.xml")));
242
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), load("recordCorda.xml")));
243

    
244
		printAll(mapAll(buildTable(rows)));
245
	}
246

    
247
	@Test
248
	public void testLinkFCT() throws Exception {
249

    
250
		final List<Row> rows = Lists.newArrayList();
251
		rows.addAll(asRows(loadFromTransformationProfile("project_2_hbase.xml"), load("projectRecordFCT.xml")));
252
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), load("recordFCT.xml")));
253

    
254
		printAll(mapAll(buildTable(rows)));
255
	}
256

    
257
	@Test
258
	public void testLinkARC() throws Exception {
259

    
260
		final List<Row> rows = Lists.newArrayList();
261
		rows.addAll(asRows(loadFromTransformationProfile("project_2_hbase.xml"), load("projectRecordARC.xml")));
262
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), load("recordARC.xml")));
263

    
264
		printAll(mapAll(buildTable(rows)));
265
	}
266

    
267
	@Test
268
	public void testLinkWT() throws Exception {
269

    
270
		final List<Row> rows = Lists.newArrayList();
271
		rows.addAll(asRows(loadFromTransformationProfile("project_2_hbase.xml"), load("projectRecordWT.xml")));
272
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), load("recordWT.xml")));
273

    
274
		printAll(mapAll(buildTable(rows)));
275
	}
276

    
277
	@Test
278
	public void testLinkOrganization() throws Exception {
279

    
280
		final List<Row> rows = Lists.newArrayList();
281
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xml"), load("organization.xml")));
282
		rows.addAll(asRows(loadFromTransformationProfile("projectorganization_2_hbase.xml"), load("project_organization.xml")));
283
		rows.addAll(asRows(loadFromTransformationProfile("project_2_hbase.xml"), load("projectRecordCorda.xml")));
284

    
285
		printAll(mapAll(buildTable(rows)));
286
	}
287

    
288
	@Test
289
	public void testUnpackAuthors() throws Exception {
290

    
291
		final Map<String, Object> xslParams = Maps.newHashMap();
292

    
293
		final Map<String, String> m = Maps.newHashMap();
294

    
295
		m.put("od______2367", "true"); // Puma
296
		m.put("od______2294", "true"); // UNIBI
297
		m.put("od________18", "false"); // Arxiv
298

    
299
		xslParams.put("mergeIdForHomonymsMap", m);
300

    
301
		final List<Row> rows = Lists.newArrayList();
302
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), xslParams, load("recordArxiv.xml")));
303
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), xslParams, load("recordArxiv2.xml")));
304
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), xslParams, load("recordPuma1.xml")));
305
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), xslParams, load("recordPuma2.xml")));
306
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), xslParams, load("recordUNIBI.xml")));
307

    
308
		printPersonFullnames(mapAll(buildTable(rows)));
309
	}
310

    
311
	@Test
312
	public void testParseDoajOAF() throws Exception {
313

    
314
		doTest(loadFromTransformationProfile("pmf2hbase.xml"), load("doajUniversityRecord.xml"));
315
	}
316

    
317
	@Test
318
	public void testParseDatasource() throws Exception {
319

    
320
		doTest(loadFromTransformationProfile("datasources_2_hbase.xml"), load("datasourceNative.xml"));
321
	}
322

    
323
	@Test
324
	public void testParseDataDatasource() throws Exception {
325

    
326
		doTest(loadFromTransformationProfile("datasources_2_hbase.xml"), load("dataDatasource.xml"));
327
	}
328

    
329
	@Test
330
	public void testFromMongodbCompressedDump() throws Exception {
331
		doTestJsonGz(loadFromTransformationProfile("pmf2hbase.xml"), load("mdstore_cleaned.json.gz"));
332
	}
333

    
334
	@Test
335
	public void testLoadFromTransformationProfile() throws IOException {
336
		InputStream in = loadFromTransformationProfile("pmf2hbase.xml");
337
		log.info(IOUtils.toString(in));
338
	}
339

    
340
	@Test
341
	public void test_template() throws Exception {
342
		final String xslt = IOUtils.toString(loadFromTransformationProfile("pmf2hbase.xml"));
343
		final XsltRowTransformer transformer = factory.getTransformer(xslt);
344
		assertNotNull(transformer);
345

    
346
		final String record = IOUtils.toString(load("record.xml"));
347
		final List<Row> rows = transformer.apply(record);
348

    
349
		System.out.println(rows);
350
	}
351

    
352
	@Test
353
	public void testWrongCharsOrganization() throws Exception {
354
		final List<Row> rows = Lists.newArrayList();
355
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xml"), load("organizationWrongChars.xml")));
356
		printAll(mapAll(buildTable(rows)));
357
	}
358

    
359
	private void doTest(final InputStream xsltStream, final InputStream recordStream) throws Exception {
360
		try {
361
			final List<Row> rows = asRows(xsltStream, recordStream);
362

    
363
			log.info(rows);
364

    
365
			final Map<String, Map<String, Map<String, byte[]>>> table = buildTable(rows);
366

    
367
			//System.out.println("\n" + table.toString());
368

    
369
			final Map<String, XmlRecordFactory> builders = mapAll(table);
370

    
371
			printAll(builders);
372
		} catch (final InvalidProtocolBufferException e) {
373
			throw new Exception(e);
374
		} catch (final TransformerConfigurationException e) {
375
			throw new Exception(e);
376
		} catch (final TransformerFactoryConfigurationError e) {
377
			throw new Exception(e);
378
		} catch (final DocumentException e) {
379
			throw new Exception(e);
380
		}
381
	}
382

    
383
	private void doTestJsonGz(final InputStream xsltStream, final InputStream recordStream) throws Exception {
384

    
385
		final Iterator<List<Row>> rowsIterator = asRowsJsonGzip(xsltStream, recordStream);
386

    
387
		int i = 0;
388
		while (rowsIterator.hasNext()) {
389
			final List<Row> rows = rowsIterator.next();
390
			i++;
391

    
392
			if ((i % 10000) == 0) {
393
				System.out.println(i);
394
			}
395

    
396
			final Map<String, Map<String, Map<String, byte[]>>> table = buildTableDoaj(rows);
397

    
398
			for (final Map<String, Map<String, byte[]>> m : table.values()) {
399
				for (final Map<String, byte[]> mv : m.values()) {
400
					for (final byte[] v : mv.values()) {
401
						final DNGFDecoder d = DNGFDecoder.decode(v);
402
						assertNotNull(d);
403
						assertNotNull(d.getDNGF());
404

    
405
						switch (d.getKind()) {
406
						case entity:
407
							assertNotNull(d.getMetadata());
408
							final Type type = d.getDNGF().getEntity().getType();
409
							if (type.equals(Type.publication) || type.equals(Type.dataset)) {
410
								System.out.println(d.getDNGF());
411
							}
412
							break;
413
						case relation:
414
							assertNotNull(d.getRel());
415
							break;
416
						default:
417
							break;
418
						}
419
					}
420
				}
421
			}
422
		}
423
	}
424

    
425
	private List<Row> asRows(final InputStream xsltStream, final InputStream recordStream) throws Exception {
426
		return asRows(xsltStream, new HashMap<String, Object>(), recordStream);
427
	}
428

    
429
	private List<Row> asRows(final InputStream xsltStream, final Map<String, Object> params, final InputStream recordStream) throws Exception {
430
		final String xslt = IOUtils.toString(xsltStream);
431
		final XsltRowTransformer transformer = factory.getTransformer(xslt, params);
432
		assertNotNull(transformer);
433

    
434
		final String record = IOUtils.toString(recordStream);
435
		final List<Row> rows = transformer.apply(record);
436

    
437
		assertNotNull(rows);
438
		assertFalse(rows.isEmpty());
439
		return rows;
440
	}
441

    
442
	private Iterator<List<Row>> asRowsJsonGzip(final InputStream xsltStream, final InputStream recordStreamJsonGzip) throws Exception {
443
		final String xslt = IOUtils.toString(xsltStream);
444
		final XsltRowTransformer transformer = factory.getTransformer(xslt);
445
		assertNotNull(transformer);
446
		assertNotNull(recordStreamJsonGzip);
447

    
448
		final GZIPInputStream stream = new GZIPInputStream(recordStreamJsonGzip);
449
		assertNotNull(stream);
450
		final BufferedReader inStream = new BufferedReader(new InputStreamReader(stream));
451
		assertNotNull(inStream);
452
		return new Iterator<List<Row>>() {
453

    
454
			String jsonRecord = null;
455

    
456
			@Override
457
			public boolean hasNext() {
458
				try {
459
					return (jsonRecord = inStream.readLine()) != null;
460
				} catch (final IOException e) {
461
					throw new RuntimeException(e);
462
				}
463
			}
464

    
465
			@Override
466
			public List<Row> next() {
467

    
468
				final JSONObject jsonObj = new JSONObject(jsonRecord);
469
				final String body = jsonObj.getString("body");
470
				try {
471
					assertTrue(StringUtils.isNotBlank(body));
472
					// System.out.println(body);
473
					final List<Row> rows = transformer.apply(body);
474
					assertNotNull(rows);
475
					assertFalse(rows.isEmpty());
476
					return rows;
477
				} catch (final Throwable e) {
478
					System.err.println("error transforming document: " + body);
479
					throw new RuntimeException(e);
480
				}
481
			}
482

    
483
			@Override
484
			public void remove() {
485
				throw new UnsupportedOperationException();
486
			}
487

    
488
		};
489

    
490
	}
491

    
492
	private Map<String, Map<String, Map<String, byte[]>>> buildTableDoaj(final List<Row> rows) throws UnsupportedEncodingException {
493
		final Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
494

    
495
		for (final Row row : rows) {
496
			final String rowKey = row.getKey();
497
			final String cf = row.getColumnFamily();
498
			if (!table.containsKey(rowKey)) {
499
				table.put(rowKey, new HashMap<String, Map<String, byte[]>>());
500
			}
501
			if (!table.get(rowKey).containsKey(cf)) {
502
				table.get(rowKey).put(row.getColumnFamily(), new HashMap<String, byte[]>());
503
			}
504
			for (final Column<String, byte[]> c : row.getColumns()) {
505
				// System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
506
				table.get(rowKey).get(cf).put(c.getName(), c.getValue());
507
				if (cf.equals("result") && c.getName().equals("body")) {
508
					// System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
509
					assertTrue(StringUtils.isNotBlank(new String(c.getValue(), "UTF-8")));
510
				}
511
			}
512
		}
513
		return table;
514

    
515
	}
516

    
517
	private Map<String, Map<String, Map<String, byte[]>>> buildTable(final List<Row> rows) throws UnsupportedEncodingException {
518
		final Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
519

    
520
		for (final Row row : rows) {
521
			final String rowKey = row.getKey();
522
			final String cf = row.getColumnFamily();
523
			if (!table.containsKey(rowKey)) {
524
				table.put(rowKey, new HashMap<String, Map<String, byte[]>>());
525
			}
526
			if (!table.get(rowKey).containsKey(cf)) {
527
				table.get(rowKey).put(row.getColumnFamily(), new HashMap<String, byte[]>());
528
			}
529
			for (final Column<String, byte[]> c : row.getColumns()) {
530
				System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
531
				table.get(rowKey).get(cf).put(c.getName(), c.getValue());
532
				if (c.getName().equals("body")) {
533
					final String theBody = new String(c.getValue(), "UTF-8");
534
					assertTrue(StringUtils.isNotBlank(theBody));
535
					// System.out.println(theBody);
536
				}
537
			}
538
		}
539
		return table;
540

    
541
	}
542

    
543
	private Map<String, XmlRecordFactory> mapAll(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
544

    
545
		final Map<String, XmlRecordFactory> builders = Maps.newHashMap();
546
		for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
547
			map(builders, e.getKey(), e.getValue());
548
		}
549
		return builders;
550
	}
551

    
552
	// private Map<String, XmlRecordFactory> mapResultsOnly(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
553
	//
554
	// final Map<String, XmlRecordFactory> builders = Maps.newHashMap();
555
	// for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
556
	// final Type type = DNGFRowKeyDecoder.decode(e.getKey()).getType();
557
	// if (type == Type.result) {
558
	// map(builders, e.getKey(), e.getValue());
559
	// }
560
	// }
561
	// return builders;
562
	// }
563

    
564
	private void map(final Map<String, XmlRecordFactory> builders, final String rowKey, final Map<String, Map<String, byte[]>> row) throws Exception {
565

    
566
		final Type type = DNGFRowKeyDecoder.decode(rowKey).getType();
567

    
568
		final Map<String, byte[]> familyMap = row.get(type.toString());
569

    
570
		if (familyMap == null) return;
571

    
572
		final byte[] bodyB = familyMap.get("body");
573

    
574
		if (bodyB != null) {
575
			ensureBuilder(builders, rowKey);
576

    
577
			final DNGF oaf = UpdateMerger.mergeBodyUpdates(familyMap);
578

    
579
			final DNGFDecoder mainEntity = DNGFDecoder.decode(oaf);
580

    
581
			builders.get(rowKey).setMainEntity(mainEntity);
582

    
583
			for (final LinkDescriptor ld : entityConfigTable.getDescriptors(type)) {
584

    
585
				final String it = ld.getRelDescriptor().getIt();
586
				final Map<String, byte[]> cols = row.get(it);
587

    
588
				if ((cols != null) && !cols.isEmpty()) {
589

    
590
					for (final byte[] oafB : cols.values()) {
591

    
592
						final DNGF.Builder relBuilder = DNGF.newBuilder(DNGF.parseFrom(oafB));
593

    
594
						if (ld.isSymmetric()) {
595
							final RelDescriptor rd = ld.getRelDescriptor();
596

    
597
							relBuilder.getRelBuilder().setCachedTarget(mainEntity.getEntity()).setRelType(rd.getRelType()).setSubRelType(rd.getSubRelType());
598
						}
599

    
600
						relBuilder.getRelBuilder().setChild(ld.isChild());
601

    
602
						final DNGF.Builder oafBuilder = DNGF.newBuilder().setKind(Kind.relation).setLastupdatetimestamp(System.currentTimeMillis());
603
						oafBuilder.mergeFrom(relBuilder.build());
604

    
605
						final String targetId = ld.isSymmetric() ? oafBuilder.getRel().getTarget() : oafBuilder.getRel().getSource();
606
						ensureBuilder(builders, targetId);
607
						final DNGFDecoder decoder = DNGFDecoder.decode(oafBuilder.build());
608

    
609
						if (ld.isChild()) {
610
							builders.get(targetId).addChild(type, decoder);
611
						} else {
612
							builders.get(targetId).addRelation(type, decoder);
613
						}
614
					}
615

    
616
				}
617
			}
618
		}
619

    
620
	}
621

    
622
	private void printAll(final Map<String, XmlRecordFactory> builders) throws DocumentException {
623
		print(Sets.newHashSet(Type.values()), builders, null);
624
	}
625

    
626
	private void printPerson(final Map<String, XmlRecordFactory> builders) throws DocumentException {
627
		print(Sets.newHashSet(Type.person), builders, null);
628
	}
629

    
630
	private void printPersonFullnames(final Map<String, XmlRecordFactory> builders) throws DocumentException {
631

    
632
		final Map<Type, Set<String>> xpaths = Maps.newHashMap();
633

    
634
		final Set<String> personPaths = Sets.newHashSet();
635

    
636
		personPaths.add("//fullname");
637

    
638
		xpaths.put(Type.person, personPaths);
639

    
640
		print(Sets.newHashSet(Type.person), builders, xpaths);
641
	}
642

    
643
	private void print(final Set<Type> types, final Map<String, XmlRecordFactory> builders, final Map<Type, Set<String>> xpaths) throws DocumentException {
644
		final SAXReader r = new SAXReader();
645

    
646
		for (final Entry<String, XmlRecordFactory> e : builders.entrySet()) {
647
			final DNGFRowKeyDecoder kd = DNGFRowKeyDecoder.decode(e.getKey());
648

    
649
			if (!e.getValue().isValid()) throw new IllegalArgumentException("invalid builder: " + e.getKey());
650
			if (types.contains(kd.getType())) {
651
				final String val = XMLIndenter.indent(e.getValue().build());
652

    
653
				if ((xpaths != null) && !xpaths.isEmpty() && (xpaths.get(kd.getType()) != null)) {
654
					final Document doc = r.read(new StringReader(val));
655

    
656
					log.debug("\n" + e.getKey());
657
					for (final String xpath : xpaths.get(kd.getType())) {
658
						log.debug(doc.valueOf(xpath));
659
					}
660
				} else {
661

    
662
					log.info(val);
663
				}
664
			}
665
		}
666
	}
667

    
668
	private void printNoIndent(final Map<String, XmlRecordFactory> builders) {
669
		for (final Entry<String, XmlRecordFactory> e : builders.entrySet()) {
670
			if (e.getValue().isValid()) {
671
				log.debug(e.getValue().build());
672
			} else {
673
				log.debug("invalid builder: " + e.getKey());
674
			}
675
		}
676
	}
677

    
678
	private void ensureBuilder(final Map<String, XmlRecordFactory> builders, final String rowKey) throws Exception {
679
		if (!builders.containsKey(rowKey)) {
680
			builders.put(rowKey, newBuilder());
681
		}
682
	}
683

    
684
	private XmlRecordFactory newBuilder() throws TransformerConfigurationException, TransformerFactoryConfigurationError, DocumentException {
685
		return new XmlRecordFactory(entityConfigTable, ContextMapper.fromXml(eu.dnetlib.data.mapreduce.hbase.index.config.Context.xml),
686
				RelClasses.fromJSon(RelClassesTest.relClassesJson), XmlRecordFactoryTest.SCHEMA_LOCATION, true, true, false);
687
	}
688

    
689
	private InputStream load(final String fileName) {
690
		return getClass().getResourceAsStream(fileName);
691
	}
692

    
693
	private InputStream loadFromTransformationProfile(final String profilePath) {
694
		log.info("Loading xslt from: " + basePathProfiles + profilePath);
695
		InputStream profile = getClass().getResourceAsStream(basePathProfiles + profilePath);
696
		SAXReader saxReader = new SAXReader();
697
		Document doc = null;
698
		try {
699
			doc = saxReader.read(profile);
700
		} catch (DocumentException e) {
701
			e.printStackTrace();
702
			throw new RuntimeException(e);
703
		}
704
		String xslt = doc.selectSingleNode("//SCRIPT/CODE/*[local-name()='stylesheet']").asXML();
705
		return IOUtils.toInputStream(xslt);
706
	}
707

    
708
}
    (1-1/1)