Project

General

Profile

1
package eu.dnetlib.data.transform;
2

    
3
import java.io.*;
4
import java.util.*;
5
import java.util.Map.Entry;
6
import java.util.stream.Collectors;
7
import java.util.zip.GZIPInputStream;
8
import javax.xml.transform.TransformerConfigurationException;
9
import javax.xml.transform.TransformerFactoryConfigurationError;
10

    
11
import com.google.common.collect.Lists;
12
import com.google.common.collect.Maps;
13
import com.google.common.collect.Sets;
14
import com.google.gson.JsonElement;
15
import com.google.gson.JsonParser;
16
import com.google.protobuf.InvalidProtocolBufferException;
17
import eu.dnetlib.data.graph.model.DNGFDecoder;
18
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder;
19
import eu.dnetlib.data.graph.utils.RelDescriptor;
20
import eu.dnetlib.data.mapreduce.hbase.index.config.EntityConfigTable;
21
import eu.dnetlib.data.mapreduce.hbase.index.config.IndexConfig;
22
import eu.dnetlib.data.mapreduce.hbase.index.config.IndexConfigTest;
23
import eu.dnetlib.data.mapreduce.hbase.index.config.LinkDescriptor;
24
import eu.dnetlib.data.mapreduce.util.UpdateMerger;
25
import eu.dnetlib.data.mapreduce.util.XmlRecordFactory;
26
import eu.dnetlib.data.mapreduce.util.XmlRecordFactoryTest;
27
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
28
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
29
import eu.dnetlib.data.proto.DliFieldTypeProtos;
30
import eu.dnetlib.data.proto.DliProtos;
31
import eu.dnetlib.data.proto.KindProtos.Kind;
32
import eu.dnetlib.data.proto.TypeProtos.Type;
33
import eu.dnetlib.miscutils.functional.xml.XMLIndenter;
34
import eu.dnetlib.pace.clustering.BlacklistAwareClusteringCombiner;
35
import eu.dnetlib.pace.config.DedupConfig;
36
import eu.dnetlib.pace.distance.PaceDocumentDistance;
37
import eu.dnetlib.pace.distance.eval.ScoreResult;
38
import eu.dnetlib.pace.model.MapDocument;
39
import eu.dnetlib.pace.model.ProtoDocumentBuilder;
40
import org.apache.commons.io.IOUtils;
41
import org.apache.commons.lang3.StringUtils;
42
import org.apache.commons.logging.Log;
43
import org.apache.commons.logging.LogFactory;
44
import org.dom4j.Document;
45
import org.dom4j.DocumentException;
46
import org.dom4j.io.SAXReader;
47
import org.junit.Before;
48
import org.junit.Test;
49

    
50
import static org.junit.Assert.*;
51

    
52
public class XsltRowTransformerFactoryTest extends AbstractTransformerTest {
53

    
54
	private static final Log log = LogFactory.getLog(XsltRowTransformerFactoryTest.class);
55

    
56

    
57
	private EntityConfigTable entityConfigTable;
58

    
59
	@Before
60
	public void setUp() throws Exception {
61
		factory = new XsltRowTransformerFactory();
62
		entityConfigTable = IndexConfig.load(IndexConfigTest.loadConfiguration()).getConfigMap();
63
	}
64

    
65
	@Test
66
	public void testParseManyAuthors() throws Exception {
67

    
68
		final Map<String, Object> xslParams = Maps.newHashMap();
69

    
70
		final Map<String, String> m = Maps.newHashMap();
71

    
72
		m.put("od______2367", "true"); // Puma
73
		m.put("od______2294", "true"); // UNIBI
74
		m.put("od________18", "false"); // Arxiv
75

    
76
		xslParams.put("mergeIdForHomonymsMap", m);
77

    
78
		final List<Row> rows = asRows(loadFromTransformationProfile("oaf_person2hbase.xml"), xslParams, load("recordManyAuthors.xml"));
79
		int authorCount = 0;
80
		for (final Row row : rows) {
81
			for (final Column<String, byte[]> col : row.getColumns()) {
82

    
83
				authorCount++;
84
				final DNGFDecoder d = DNGFDecoder.decode(col.getValue());
85
				assertNotNull(d);
86

    
87
				log.debug(d.getEntity().getPerson().getCoauthorList().size());
88
			}
89
		}
90
		log.info("authors' count: ---> " + authorCount);
91

    
92
	}
93

    
94
	@Test
95
	public void testParseAuthors() throws Exception {
96

    
97
		final Map<String, Object> xslParams = Maps.newHashMap();
98

    
99
		final Map<String, String> m = Maps.newHashMap();
100

    
101
		m.put("od______2367", "true"); // Puma
102
		m.put("od______2294", "true"); // UNIBI
103
		m.put("od________18", "false"); // Arxiv
104

    
105
		xslParams.put("mergeIdForHomonymsMap", m);
106

    
107
		final List<Row> rows = asRows(loadFromTransformationProfile("oaf_person2hbase.xml"), xslParams, load("recordArxiv.xml"));
108

    
109
		for (final Row row : rows) {
110
			for (final Column<String, byte[]> col : row.getColumns()) {
111

    
112
				final DNGFDecoder d = DNGFDecoder.decode(col.getValue());
113
				log.debug(d.getDNGF());
114
			}
115
		}
116
	}
117

    
118
	@Test
119
	public void testParseDNGFClaimUpdate() throws Exception {
120
		doTest(loadFromTransformationProfile("claimUpdates_2_hbase.xml"), load("recordClaimUpdate.xml"));
121
	}
122

    
123
	@Test
124
	public void testParseClaimUpdate() throws Exception {
125

    
126
		final List<Row> rows = Lists.newArrayList();
127
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), load("recordClaim.xml")));
128
		rows.addAll(asRows(loadFromTransformationProfile("claimUpdates_2_hbase.xml"), load("recordClaimUpdate.xml")));
129

    
130
		printAll(mapAll(buildTable(rows)));
131
	}
132

    
133
	@Test
134
	public void testParseDatasetPUB() throws Exception {
135

    
136
		doTest(loadFromTransformationProfile("dmf2hbase.xml"), load("recordDatasetPUB.xml"));
137
	}
138

    
139
	@Test
140
	public void testParseClaim() throws Exception {
141

    
142
		doTest(loadFromTransformationProfile("pmf2hbase.xml"), load("recordClaim.xml"));
143
	}
144

    
145
	@Test
146
	public void testParseClaimDataset() throws Exception {
147

    
148
		doTest(loadFromTransformationProfile("dmf2hbase.xml"), load("recordClaimDataset.xml"));
149
	}
150

    
151
	@Test
152
	public void testParseACM() throws Exception {
153

    
154
		doTest(loadFromTransformationProfile("pmf2hbase.xml"), load("recordACM.xml"));
155
	}
156

    
157
	@Test
158
	public void testParseASB() throws Exception {
159

    
160
		doTest(loadFromTransformationProfile("pmf2hbase.xml"), load("recordASB.xml"));
161
	}
162

    
163
	@Test
164
	public void testParseProjectCorda() throws Exception {
165

    
166
		doTest(loadFromTransformationProfile("project_2_hbase.xml"), load("projectRecordCorda.xml"));
167
	}
168

    
169
	@Test
170
	public void testParseProjectFCT() throws Exception {
171

    
172
		doTest(loadFromTransformationProfile("project_2_hbase.xml"), load("projectRecordFCT.xml"));
173
	}
174

    
175
	@Test
176
	public void testParseDNGF() throws Exception {
177

    
178
		doTest(loadFromTransformationProfile("pmf2hbase.xml"), load("record.xml"));
179
	}
180

    
181
	@Test
182
	public void testParseDNGFPublication() throws Exception {
183

    
184
		doTest(loadFromTransformationProfile("oaf_publication2hbase.xml"), load("record.xml"));
185
	}
186

    
187
	@Test
188
	public void testParseWDSDaaaatacite() throws Exception {
189

    
190
        //doTest(loadFromTransformationProfile("dmfwds2hbase.xml"), load("recordWds.xml"));
191
    }
192

    
193

    
194
	@Test
195
	public void testParseDLIDatacite() throws Exception {
196

    
197
        final List<Row> rows = Lists.newArrayList();
198
        rows.addAll(asRows(loadFromTransformationProfile("dmfdli2hbase.xml"), load("record_dli_dmf2.xml")));
199
        rows.forEach(row ->
200
                row.getColumns().forEach(
201
						result -> {
202
							if (result != null) {
203
								final DNGFDecoder decoder =
204
										DNGFDecoder.decode(result.getValue(), DliFieldTypeProtos.completionStatus, DliProtos.completionStatus, DliProtos.resolvedfrom, DliProtos.typedIdentifier);
205

    
206
							System.out.println(result.getName());
207
							System.out.println("decoder.getDNGF().toString() = " + decoder.getDNGF().toString());
208
							}
209
						}
210
				));
211
	}
212

    
213
	@Test
214
	public void testParseDLIPMF() throws Exception {
215

    
216
		doTest(loadFromTransformationProfile("pmfdli2hbase.xml"), load("record_dli_pmf.xml"));
217

    
218
		final List<Row> rows = Lists.newArrayList();
219
		rows.addAll(asRows(loadFromTransformationProfile("pmfdli2hbase.xml"), load("record_dli_pmf.xml")));
220

    
221

    
222
		rows.forEach(row -> {
223
			System.out.println(row.getKey().toString());
224
			row.getColumns().forEach(result -> {
225
				System.out.println(result.getName());
226
						if (result != null) {
227
							final DNGFDecoder decoder =
228
									DNGFDecoder.decode(result.getValue(), DliFieldTypeProtos.completionStatus, DliProtos.completionStatus, DliProtos.resolvedfrom, DliProtos.typedIdentifier);
229

    
230
							System.out.println("decoder.getDNGF().toString() = " + decoder.getDNGF().toString());
231
						}
232
					}
233
			);
234
		});
235
	}
236

    
237
	@Test
238
	public void testParseDatacite() throws Exception {
239

    
240
		doTest(loadFromTransformationProfile("dmf2hbase.xml"), load("recordDatacite.xml"));
241
	}
242

    
243
	@Test
244
	public void testParseDatacite2() throws Exception {
245

    
246
		doTest(loadFromTransformationProfile("dmf2hbase.xml"), load("recordDatacite2.xml"));
247
	}
248

    
249
	@Test
250
	public void testParseOpenTrials() throws Exception {
251

    
252
		doTest(loadFromTransformationProfile("dmf2hbase.xml"), load("opentrials_datacite1.xml"));
253
	}
254

    
255

    
256
	@Test
257
	public void testLinkPangaea() throws Exception {
258

    
259
		final List<Row> rows = Lists.newArrayList();
260
		rows.addAll(asRows(loadFromTransformationProfile("dmf2hbase.xml"), load("pangaeODF1.xml")));
261
		rows.addAll(asRows(loadFromTransformationProfile("dmf2hbase.xml"), load("pangaeODF2.xml")));
262
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), load("pangaeOAF.xml")));
263
		rows.addAll(asRows(loadFromTransformationProfile("project_2_hbase.xml"), load("projectRecordCordaPangaea.xml")));
264

    
265
		printAll(mapAll(buildTable(rows)));
266
	}
267

    
268
	@Test
269
	public void testLinkCorda() throws Exception {
270

    
271
		final List<Row> rows = Lists.newArrayList();
272
		rows.addAll(asRows(loadFromTransformationProfile("project_2_hbase.xml"), load("projectRecordCorda.xml")));
273
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), load("recordCorda.xml")));
274

    
275
		printAll(mapAll(buildTable(rows)));
276
	}
277

    
278
	@Test
279
	public void testLinkFCT() throws Exception {
280

    
281
		final List<Row> rows = Lists.newArrayList();
282
		rows.addAll(asRows(loadFromTransformationProfile("project_2_hbase.xml"), load("projectRecordFCT.xml")));
283
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), load("recordFCT.xml")));
284

    
285
		printAll(mapAll(buildTable(rows)));
286
	}
287

    
288
	@Test
289
	public void testLinkARC() throws Exception {
290

    
291
		final List<Row> rows = Lists.newArrayList();
292
		rows.addAll(asRows(loadFromTransformationProfile("project_2_hbase.xml"), load("projectRecordARC.xml")));
293
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), load("recordARC.xml")));
294

    
295
		printAll(mapAll(buildTable(rows)));
296
	}
297

    
298
	@Test
299
	public void testLinkWT() throws Exception {
300

    
301
		final List<Row> rows = Lists.newArrayList();
302
		rows.addAll(asRows(loadFromTransformationProfile("project_2_hbase.xml"), load("projectRecordWT.xml")));
303
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), load("recordWT.xml")));
304

    
305
		printAll(mapAll(buildTable(rows)));
306
	}
307

    
308
	@Test
309
	public void testLinkOrganization() throws Exception {
310

    
311
		final List<Row> rows = Lists.newArrayList();
312
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xml"), load("organization.xml")));
313
		rows.addAll(asRows(loadFromTransformationProfile("projectorganization_2_hbase.xml"), load("project_organization.xml")));
314
		rows.addAll(asRows(loadFromTransformationProfile("project_2_hbase.xml"), load("projectRecordCorda.xml")));
315

    
316
		printAll(mapAll(buildTable(rows)));
317
	}
318

    
319
	@Test
320
	public void testUnpackAuthors() throws Exception {
321

    
322
		final Map<String, Object> xslParams = Maps.newHashMap();
323

    
324
		final Map<String, String> m = Maps.newHashMap();
325

    
326
		m.put("od______2367", "true"); // Puma
327
		m.put("od______2294", "true"); // UNIBI
328
		m.put("od________18", "false"); // Arxiv
329

    
330
		xslParams.put("mergeIdForHomonymsMap", m);
331

    
332
		final List<Row> rows = Lists.newArrayList();
333
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), xslParams, load("recordArxiv.xml")));
334
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), xslParams, load("recordArxiv2.xml")));
335
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), xslParams, load("recordPuma1.xml")));
336
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), xslParams, load("recordPuma2.xml")));
337
		rows.addAll(asRows(loadFromTransformationProfile("pmf2hbase.xml"), xslParams, load("recordUNIBI.xml")));
338

    
339
		printPersonFullnames(mapAll(buildTable(rows)));
340
	}
341

    
342
	@Test
343
	public void testParseDoajOAF() throws Exception {
344

    
345
		doTest(loadFromTransformationProfile("pmf2hbase.xml"), load("doajUniversityRecord.xml"));
346
	}
347

    
348
	@Test
349
	public void testParseDatasource() throws Exception {
350

    
351
		doTest(loadFromTransformationProfile("datasources_2_hbase.xml"), load("datasourceNative.xml"));
352
	}
353

    
354
	@Test
355
	public void testParseDataDatasource() throws Exception {
356

    
357
		doTest(loadFromTransformationProfile("datasources_2_hbase.xml"), load("dataDatasource.xml"));
358
	}
359

    
360
	@Test
361
	public void testFromMongodbCompressedDump() throws Exception {
362
		doTestJsonGz(loadFromTransformationProfile("pmf2hbase.xml"), load("mdstore_cleaned.json.gz"));
363
	}
364

    
365
	@Test
366
	public void testLoadFromTransformationProfile() throws IOException {
367
		InputStream in = loadFromTransformationProfile("pmf2hbase.xml");
368
		log.info(IOUtils.toString(in));
369
	}
370

    
371
	@Test
372
	public void test_template() throws Exception {
373
		final String xslt = IOUtils.toString(loadFromTransformationProfile("pmf2hbase.xml"));
374
		final XsltRowTransformer transformer = factory.getTransformer(xslt);
375
		assertNotNull(transformer);
376

    
377
		final String record = IOUtils.toString(load("record.xml"));
378
		final List<Row> rows = transformer.apply(record);
379

    
380
		System.out.println(rows);
381
	}
382

    
383
	@Test
384
	public void testWrongCharsOrganization() throws Exception {
385
		final List<Row> rows = Lists.newArrayList();
386
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xml"), load("organizationWrongChars.xml")));
387
		printAll(mapAll(buildTable(rows)));
388
	}
389

    
390

    
391

    
392
	@Test
393
	public void testMapDLIDataciteAsMapDocument() throws Exception {
394

    
395
		final List<Row> rows = Lists.newArrayList();
396
		rows.addAll(asRows(loadFromTransformationProfile("dmfdli2hbase.xml"), load("record_dli_dmf.xml")));
397

    
398
		final List<Optional<DNGF>> collect = rows.stream().map(row -> row.getColumns().stream().map(
399
				result -> {
400
					final DNGFDecoder decoder =
401
							DNGFDecoder.decode(result.getValue(), DliFieldTypeProtos.completionStatus, DliProtos.completionStatus, DliProtos.resolvedfrom,
402
									DliProtos.typedIdentifier);
403
					return decoder.getDNGF();
404
				}
405
		).filter(dngf -> dngf.getKind().equals(Kind.entity) && dngf.getEntity().getType().equals(Type.dataset))
406
				.findFirst())
407
				.filter(Optional::isPresent)
408
				.collect(Collectors.toList());
409

    
410
		collect.forEach(c -> {
411
			System.out.println(c.get().toString());
412
		});
413

    
414
		final DNGFEntity entity = collect.get(0).get().getEntity();
415

    
416
		final DedupConfig conf = DedupConfig.load(IOUtils.toString(load("/eu/dnetlib/pace/dli.pace.dataset.json")));
417

    
418
		final MapDocument d1 = ProtoDocumentBuilder.newInstance(entity.getId(), entity, conf.getPace().getModel());
419

    
420
		final ScoreResult sr = new PaceDocumentDistance().between(d1, d1, conf);
421
		final double d = sr.getScore();
422
		System.out.println(String.format(" d ---> %s", d));
423

    
424
		BlacklistAwareClusteringCombiner.filterAndCombine(d1, conf, conf.getPace().getBlacklists());
425

    
426
		System.out.println(d1.toString());
427

    
428

    
429
	}
430

    
431

    
432
	private void doTest(final InputStream xsltStream, final InputStream recordStream) throws Exception {
433
		try {
434
			final List<Row> rows = asRows(xsltStream, recordStream);
435

    
436
			log.info(rows);
437

    
438
			final Map<String, Map<String, Map<String, byte[]>>> table = buildTable(rows);
439

    
440
			//System.out.println("\n" + table.toString());
441

    
442
			final Map<String, XmlRecordFactory> builders = mapAll(table);
443

    
444
			printAll(builders);
445
		} catch (final InvalidProtocolBufferException e) {
446
			throw new Exception(e);
447
		} catch (final TransformerConfigurationException e) {
448
			throw new Exception(e);
449
		} catch (final TransformerFactoryConfigurationError e) {
450
			throw new Exception(e);
451
		} catch (final DocumentException e) {
452
			throw new Exception(e);
453
		}
454
	}
455

    
456
	private void doTestJsonGz(final InputStream xsltStream, final InputStream recordStream) throws Exception {
457

    
458
		final Iterator<List<Row>> rowsIterator = asRowsJsonGzip(xsltStream, recordStream);
459

    
460
		int i = 0;
461
		while (rowsIterator.hasNext()) {
462
			final List<Row> rows = rowsIterator.next();
463
			i++;
464

    
465
			if ((i % 10000) == 0) {
466
				System.out.println(i);
467
			}
468

    
469
			final Map<String, Map<String, Map<String, byte[]>>> table = buildTableDoaj(rows);
470

    
471
			for (final Map<String, Map<String, byte[]>> m : table.values()) {
472
				for (final Map<String, byte[]> mv : m.values()) {
473
					for (final byte[] v : mv.values()) {
474
						final DNGFDecoder d = DNGFDecoder.decode(v);
475
						assertNotNull(d);
476
						assertNotNull(d.getDNGF());
477

    
478
						switch (d.getKind()) {
479
						case entity:
480
							assertNotNull(d.getMetadata());
481
							final Type type = d.getDNGF().getEntity().getType();
482
							if (type.equals(Type.publication) || type.equals(Type.dataset)) {
483
								System.out.println(d.getDNGF());
484
							}
485
							break;
486
						case relation:
487
							assertNotNull(d.getDNGFRel());
488
							break;
489
						default:
490
							break;
491
						}
492
					}
493
				}
494
			}
495
		}
496
	}
497

    
498

    
499
	private Iterator<List<Row>> asRowsJsonGzip(final InputStream xsltStream, final InputStream recordStreamJsonGzip) throws Exception {
500
		final String xslt = IOUtils.toString(xsltStream);
501
		final XsltRowTransformer transformer = factory.getTransformer(xslt);
502
		assertNotNull(transformer);
503
		assertNotNull(recordStreamJsonGzip);
504

    
505
		final GZIPInputStream stream = new GZIPInputStream(recordStreamJsonGzip);
506
		assertNotNull(stream);
507
		final BufferedReader inStream = new BufferedReader(new InputStreamReader(stream));
508
		assertNotNull(inStream);
509
		return new Iterator<List<Row>>() {
510

    
511
			String jsonRecord = null;
512

    
513
			@Override
514
			public boolean hasNext() {
515
				try {
516
					return (jsonRecord = inStream.readLine()) != null;
517
				} catch (final IOException e) {
518
					throw new RuntimeException(e);
519
				}
520
			}
521

    
522
			@Override
523
			public List<Row> next() {
524

    
525
				final JsonElement json = new JsonParser().parse(jsonRecord);
526
				final String body = json.getAsJsonObject().get("body").getAsString();
527
				try {
528
					assertTrue(StringUtils.isNotBlank(body));
529
					// System.out.println(body);
530
					final List<Row> rows = transformer.apply(body);
531
					assertNotNull(rows);
532
					assertFalse(rows.isEmpty());
533
					return rows;
534
				} catch (final Throwable e) {
535
					System.err.println("error transforming document: " + body);
536
					throw new RuntimeException(e);
537
				}
538
			}
539

    
540
			@Override
541
			public void remove() {
542
				throw new UnsupportedOperationException();
543
			}
544

    
545
		};
546

    
547
	}
548

    
549
	private Map<String, Map<String, Map<String, byte[]>>> buildTableDoaj(final List<Row> rows) throws UnsupportedEncodingException {
550
		final Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
551

    
552
		for (final Row row : rows) {
553
			final String rowKey = row.getKey();
554
			final String cf = row.getColumnFamily();
555
			if (!table.containsKey(rowKey)) {
556
				table.put(rowKey, new HashMap<>());
557
			}
558
			if (!table.get(rowKey).containsKey(cf)) {
559
				table.get(rowKey).put(row.getColumnFamily(), new HashMap<>());
560
			}
561
			for (final Column<String, byte[]> c : row.getColumns()) {
562
				// System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
563
				table.get(rowKey).get(cf).put(c.getName(), c.getValue());
564
				if (cf.equals("result") && c.getName().equals("body")) {
565
					// System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
566
					assertTrue(StringUtils.isNotBlank(new String(c.getValue(), "UTF-8")));
567
				}
568
			}
569
		}
570
		return table;
571

    
572
	}
573

    
574

    
575

    
576
	private Map<String, XmlRecordFactory> mapAll(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
577

    
578
		final Map<String, XmlRecordFactory> builders = Maps.newHashMap();
579
		for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
580
			map(builders, e.getKey(), e.getValue());
581
		}
582
		return builders;
583
	}
584

    
585
	// private Map<String, XmlRecordFactory> mapResultsOnly(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
586
	//
587
	// final Map<String, XmlRecordFactory> builders = Maps.newHashMap();
588
	// for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
589
	// final Type type = DNGFRowKeyDecoder.decode(e.getKey()).getType();
590
	// if (type == Type.result) {
591
	// map(builders, e.getKey(), e.getValue());
592
	// }
593
	// }
594
	// return builders;
595
	// }
596

    
597
	private void map(final Map<String, XmlRecordFactory> builders, final String rowKey, final Map<String, Map<String, byte[]>> row) throws Exception {
598

    
599
		final Type type = DNGFRowKeyDecoder.decode(rowKey).getType();
600

    
601
		final Map<String, byte[]> familyMap = row.get(type.toString());
602

    
603
		if (familyMap == null) return;
604

    
605
		final byte[] bodyB = familyMap.get("body");
606

    
607
		if (bodyB != null) {
608
			ensureBuilder(builders, rowKey);
609

    
610
			final DNGF oaf = UpdateMerger.mergeBodyUpdates(familyMap, type);
611

    
612
			final DNGFDecoder mainEntity = DNGFDecoder.decode(oaf);
613

    
614
			builders.get(rowKey).setMainEntity(mainEntity);
615

    
616
			for (final LinkDescriptor ld : entityConfigTable.getDescriptors(type)) {
617

    
618
				final String it = ld.getRelDescriptor().getCode();
619
				final Map<String, byte[]> cols = row.get(it);
620

    
621
				if ((cols != null) && !cols.isEmpty()) {
622

    
623
					for (final byte[] oafB : cols.values()) {
624

    
625
						final DNGF.Builder relBuilder = DNGF.newBuilder(DNGF.parseFrom(oafB));
626

    
627
						if (ld.isSymmetric()) {
628
							final RelDescriptor rd = ld.getRelDescriptor();
629

    
630
							relBuilder.getRelBuilder().setCachedTarget(mainEntity.getEntity()).setRelType(rd.asQualifier());
631
						}
632

    
633
						relBuilder.getRelBuilder().setChild(ld.isChild());
634

    
635
						final DNGF.Builder oafBuilder = DNGF.newBuilder().setKind(Kind.relation).setLastupdatetimestamp(System.currentTimeMillis());
636
						oafBuilder.mergeFrom(relBuilder.build());
637

    
638
						final String targetId = ld.isSymmetric() ? oafBuilder.getRel().getTarget() : oafBuilder.getRel().getSource();
639
						ensureBuilder(builders, targetId);
640
						final DNGFDecoder decoder = DNGFDecoder.decode(oafBuilder.build());
641

    
642
						if (ld.isChild()) {
643
							builders.get(targetId).addChild(type, decoder);
644
						} else {
645
							builders.get(targetId).addRelation(type, decoder);
646
						}
647
					}
648

    
649
				}
650
			}
651
		}
652

    
653
	}
654

    
655
	private void printAll(final Map<String, XmlRecordFactory> builders) throws DocumentException {
656
		print(Sets.newHashSet(Type.values()), builders, null);
657
	}
658

    
659
	private void printPerson(final Map<String, XmlRecordFactory> builders) throws DocumentException {
660
		print(Sets.newHashSet(Type.person), builders, null);
661
	}
662

    
663
	private void printPersonFullnames(final Map<String, XmlRecordFactory> builders) throws DocumentException {
664

    
665
		final Map<Type, Set<String>> xpaths = Maps.newHashMap();
666

    
667
		final Set<String> personPaths = Sets.newHashSet();
668

    
669
		personPaths.add("//fullname");
670

    
671
		xpaths.put(Type.person, personPaths);
672

    
673
		print(Sets.newHashSet(Type.person), builders, xpaths);
674
	}
675

    
676
	private void print(final Set<Type> types, final Map<String, XmlRecordFactory> builders, final Map<Type, Set<String>> xpaths) throws DocumentException {
677
		final SAXReader r = new SAXReader();
678

    
679
		for (final Entry<String, XmlRecordFactory> e : builders.entrySet()) {
680
			final DNGFRowKeyDecoder kd = DNGFRowKeyDecoder.decode(e.getKey());
681

    
682
			if (!e.getValue().isValid()) throw new IllegalArgumentException("invalid builder: " + e.getKey());
683
			if (types.contains(kd.getType())) {
684
				final String val = XMLIndenter.indent(e.getValue().build());
685

    
686
				if ((xpaths != null) && !xpaths.isEmpty() && (xpaths.get(kd.getType()) != null)) {
687
					final Document doc = r.read(new StringReader(val));
688

    
689
					log.debug("\n" + e.getKey());
690
					for (final String xpath : xpaths.get(kd.getType())) {
691
						log.debug(doc.valueOf(xpath));
692
					}
693
				} else {
694

    
695
					log.info(val);
696
				}
697
			}
698
		}
699
	}
700

    
701
	private void printNoIndent(final Map<String, XmlRecordFactory> builders) {
702
		for (final Entry<String, XmlRecordFactory> e : builders.entrySet()) {
703
			if (e.getValue().isValid()) {
704
				log.debug(e.getValue().build());
705
			} else {
706
				log.debug("invalid builder: " + e.getKey());
707
			}
708
		}
709
	}
710

    
711
	private void ensureBuilder(final Map<String, XmlRecordFactory> builders, final String rowKey) throws Exception {
712
		if (!builders.containsKey(rowKey)) {
713
			builders.put(rowKey, newBuilder());
714
		}
715
	}
716

    
717
	private XmlRecordFactory newBuilder() throws TransformerConfigurationException, TransformerFactoryConfigurationError, DocumentException, IOException {
718
		return new XmlRecordFactory(entityConfigTable, IndexConfigTest.loadContextFromCp(), OntologyLoader.loadOntologiesFromCp(),
719
				XmlRecordFactoryTest.SCHEMA_LOCATION, true, true, false);
720
	}
721

    
722

    
723
}
(5-5/5)