Project

General

Profile

1
package eu.dnetlib.data.transform;
2

    
3
import java.io.*;
4
import java.util.*;
5
import java.util.Map.Entry;
6
import java.util.zip.GZIPInputStream;
7
import javax.xml.transform.TransformerConfigurationException;
8
import javax.xml.transform.TransformerFactoryConfigurationError;
9

    
10
import com.google.common.base.Function;
11
import com.google.common.collect.Iterables;
12
import com.google.common.collect.Lists;
13
import com.google.common.collect.Maps;
14
import com.google.common.collect.Sets;
15
import com.google.protobuf.InvalidProtocolBufferException;
16
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
17
import eu.dnetlib.data.mapreduce.util.*;
18
import eu.dnetlib.data.proto.KindProtos.Kind;
19
import eu.dnetlib.data.proto.OafProtos.Oaf;
20
import eu.dnetlib.data.proto.TypeProtos.Type;
21
import eu.dnetlib.miscutils.functional.xml.IndentXmlString;
22
import org.apache.commons.io.IOUtils;
23
import org.apache.commons.lang.StringUtils;
24
import org.apache.commons.logging.Log;
25
import org.apache.commons.logging.LogFactory;
26
import org.dom4j.Document;
27
import org.dom4j.DocumentException;
28
import org.dom4j.io.SAXReader;
29
import org.json.JSONObject;
30
import org.junit.Before;
31
import org.junit.Ignore;
32
import org.junit.Test;
33

    
34
import static org.junit.Assert.*;
35

    
36
public class XsltRowTransformerFactoryTest {
37

    
38
	private static final Log log = LogFactory.getLog(XsltRowTransformerFactoryTest.class);
39
	private static String basePathProfiles = "/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/";
40
	private XsltRowTransformerFactory factory;
41
	private EntityConfigTable entityConfigTable;
42

    
43
	@Before
44
	public void setUp() throws Exception {
45
		factory = new XsltRowTransformerFactory();
46
		entityConfigTable = IndexConfig.load(IndexConfigTest.config).getConfigMap();
47
	}
48

    
49
	@Test
50
	public void testParseManyAuthors() throws Exception {
51

    
52
		final Map<String, Object> xslParams = Maps.newHashMap();
53

    
54
		final Map<String, String> m = Maps.newHashMap();
55

    
56
		m.put("od______2367", "true"); // Puma
57
		m.put("od______2294", "true"); // UNIBI
58
		m.put("od________18", "false"); // Arxiv
59

    
60
		xslParams.put("mergeIdForHomonymsMap", m);
61

    
62
		final List<Row> rows = asRows(loadFromTransformationProfile("oaf_person2hbase.xml"), xslParams, load("recordManyAuthors.xml"));
63
		int authorCount = 0;
64
		for (final Row row : rows) {
65
			for (final Column<String, byte[]> col : row.getColumns()) {
66

    
67
				authorCount++;
68
				final OafDecoder d = OafDecoder.decode(col.getValue());
69
				assertNotNull(d);
70

    
71
				log.debug(d.getEntity().getPerson().getCoauthorList().size());
72
			}
73
		}
74
		log.info("authors' count: ---> " + authorCount);
75

    
76
	}
77

    
78
	@Test
79
	public void testParseAuthors() throws Exception {
80

    
81
		final Map<String, Object> xslParams = Maps.newHashMap();
82

    
83
		final Map<String, String> m = Maps.newHashMap();
84

    
85
		m.put("od______2367", "true"); // Puma
86
		m.put("od______2294", "true"); // UNIBI
87
		m.put("od________18", "false"); // Arxiv
88

    
89
		xslParams.put("mergeIdForHomonymsMap", m);
90

    
91
		final List<Row> rows = asRows(loadFromTransformationProfile("oaf_person2hbase.xml"), xslParams, load("recordArxiv.xml"));
92

    
93
		for (final Row row : rows) {
94
			for (final Column<String, byte[]> col : row.getColumns()) {
95

    
96
				final OafDecoder d = OafDecoder.decode(col.getValue());
97
				log.debug(d.getOaf());
98
			}
99
		}
100
	}
101

    
102
	@Test
103
	@Ignore // need to reimplement because claimUpdates_2_hbase.xsl was removed
104
	public void testParseOafClaimUpdate() throws Exception {
105
		doTest(loadFromTransformationProfile("claimUpdates_2_hbase.xsl"), load("recordClaimUpdate.xml"));
106
	}
107

    
108
	@Test
109
	@Ignore // need to reimplement because claimUpdates_2_hbase.xsl was removed
110
	public void testParseClaimUpdate() throws Exception {
111

    
112
		final List<Row> rows = Lists.newArrayList();
113
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordClaim.xml")));
114
		rows.addAll(asRows(loadFromTransformationProfile("claimUpdates_2_hbase.xsl"), load("recordClaimUpdate.xml")));
115

    
116
		printAll(mapAll(buildTable(rows)));
117
	}
118

    
119
	@Test
120
	public void testParseDatasetPUB() throws Exception {
121

    
122
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatasetPUB.xml"));
123
	}
124

    
125
	@Test
126
	public void testParseDatasetLindat() throws Exception {
127

    
128
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("datasetLindat.xml"));
129
	}
130

    
131
	@Test
132
	public void testParseClaim() throws Exception {
133

    
134
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordClaim.xml"));
135
	}
136

    
137
	@Test
138
	public void testParseClaimDataset() throws Exception {
139

    
140
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordClaimDataset.xml"));
141
	}
142

    
143
	@Test
144
	public void testParseACM() throws Exception {
145

    
146
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordACM.xml"));
147
	}
148

    
149
	@Test
150
	public void testParseASB() throws Exception {
151

    
152
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordASB.xml"));
153
	}
154

    
155
	@Test
156
	public void testParseProjectCorda() throws Exception {
157

    
158
		doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml"));
159
	}
160

    
161
	@Test
162
	public void testParseProjectFCT() throws Exception {
163

    
164
		doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordFCT.xml"));
165
	}
166

    
167

    
168
	@Test
169
	public void testParseOaf() throws Exception {
170

    
171
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("record.xml"));
172
	}
173

    
174
	@Test
175
	public void testParseOafPublication() throws Exception {
176

    
177
		doTest(loadFromTransformationProfile("oaf_publication2hbase.xml"), load("record.xml"));
178
	}
179

    
180
	@Test
181
	public void testParseLindat() throws Exception {
182

    
183
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordLindat.xml"));
184
	}
185

    
186
	@Test
187
	public void testParseDatacite() throws Exception {
188

    
189
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatacite.xml"));
190
	}
191

    
192
	@Test
193
	public void testParseDatacite2() throws Exception {
194

    
195
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatacite2.xml"));
196
	}
197

    
198
	@Test
199
	public void testParseOpenTrials() throws Exception {
200

    
201
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("opentrials_datacite1.xml"));
202
	}
203

    
204
	@Test
205
	public void testLinkPangaea() throws Exception {
206

    
207
		final List<Row> rows = Lists.newArrayList();
208
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("pangaeODF1.xml")));
209
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("pangaeODF2.xml")));
210
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("pangaeOAF.xml")));
211
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCordaPangaea.xml")));
212

    
213
		printAll(mapAll(buildTable(rows)));
214
	}
215

    
216
	@Test
217
	public void testPangaea() throws Exception {
218

    
219
		final List<Row> rows = Lists.newArrayList();
220
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("pangaeOAF2.xml")));
221
		printAll(mapAll(buildTable(rows)));
222
	}
223
	@Test
224
	public void testZenodo() throws Exception {
225

    
226
		final List<Row> rows = Lists.newArrayList();
227
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("zenodoData.xml")));
228
		printAll(mapAll(buildTable(rows)));
229
	}
230

    
231
	@Test
232
	public void testLinkCorda() throws Exception {
233

    
234
		final List<Row> rows = Lists.newArrayList();
235
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
236
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordCorda.xml")));
237

    
238
		printAll(mapAll(buildTable(rows)));
239
	}
240

    
241
	@Test
242
	public void testLinkFCT() throws Exception {
243

    
244
		final List<Row> rows = Lists.newArrayList();
245
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordFCT.xml")));
246
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordFCT.xml")));
247

    
248
		printAll(mapAll(buildTable(rows)));
249
	}
250

    
251
	@Test
252
	public void testLinkARC() throws Exception {
253

    
254
		final List<Row> rows = Lists.newArrayList();
255
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordARC.xml")));
256
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordARC.xml")));
257

    
258
		printAll(mapAll(buildTable(rows)));
259
	}
260

    
261
	@Test
262
	public void testLinkWT() throws Exception {
263

    
264
		final List<Row> rows = Lists.newArrayList();
265
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordWT.xml")));
266
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordWT.xml")));
267

    
268
		printAll(mapAll(buildTable(rows)));
269
	}
270

    
271

    
272

    
273
	@Test
274
	public void testLinkOrganization() throws Exception {
275

    
276
		final List<Row> rows = Lists.newArrayList();
277
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organization.xml")));
278
		rows.addAll(asRows(loadFromTransformationProfile("projectorganization_2_hbase.xsl"), load("project_organization.xml")));
279
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
280

    
281
		printAll(mapAll(buildTable(rows)));
282
	}
283

    
284
	@Test
285
	public void testLinkOrganizationAffiliation() throws Exception {
286

    
287
		final List<Row> rows = Lists.newArrayList();
288
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organization.xml")));
289
		rows.addAll(asRows(loadFromTransformationProfile("resultorganization_2_hbase.xsl"), load("result_organization.xml")));
290
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("record.xml")));
291

    
292
		printAll(mapAll(buildTable(rows)));
293
	}
294

    
295
	@Test
296
	public void testUnpackAuthors() throws Exception {
297

    
298
		final Map<String, Object> xslParams = Maps.newHashMap();
299

    
300
		final Map<String, String> m = Maps.newHashMap();
301

    
302
		m.put("od______2367", "true"); // Puma
303
		m.put("od______2294", "true"); // UNIBI
304
		m.put("od________18", "false"); // Arxiv
305

    
306
		xslParams.put("mergeIdForHomonymsMap", m);
307

    
308
		final List<Row> rows = Lists.newArrayList();
309
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordArxiv.xml")));
310
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordArxiv2.xml")));
311
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordPuma1.xml")));
312
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordPuma2.xml")));
313
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordUNIBI.xml")));
314

    
315
		printPersonFullnames(mapAll(buildTable(rows)));
316
	}
317

    
318
	@Test
319
	public void testDuplicates() throws Exception {
320
		final String mergeId = "50|dedup_wf_001::08ed625d07e5738b794ff14d6773fd9f";
321
		final List<Row> rows = Lists.newArrayList();
322

    
323
		final Function<Row, Row> f = new Function<Row, Row>() {
324
			@Override
325
			public Row apply(final Row rowIn) {
326

    
327
				final List<Column<String,byte[]>> cols = Lists.newArrayList();
328
				for(Column<String,byte[]> col : rowIn.getColumns()) {
329
					if (col.getName().equals("body")) {
330
						cols.add(new Column(col.getName(), col.getValue()));
331

    
332
					}
333
				}
334
				return new Row("result", rowIn.getKey(), cols);
335
			}
336
		};
337

    
338
		final List<Row> puma1 = asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordPuma1.xml"), f);
339
		puma1.add(new Row("resultResult_dedup_isMergedIn", mergeId));
340

    
341
		final List<Row> puma2 = asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordPuma2.xml"), f);
342
		puma2.add(new Row("resultResult_dedup_isMergedIn", mergeId));
343

    
344
		rows.addAll(puma1);
345
		rows.addAll(puma2);
346

    
347
		List<Oaf> duplicates = Lists.newArrayList();
348
		duplicates.add(getOafBody(puma1));
349
		duplicates.add(getOafBody(puma2));
350
		final Oaf.Builder oafMerge = OafEntityMerger.merge(mergeId, duplicates);
351

    
352
		final Row mergeRow = new Row("result", mergeId, Lists.newArrayList(new Column("body", oafMerge.build().toByteArray())));
353

    
354
		rows.add(mergeRow);
355

    
356
		printAll(mapAll(buildTable(rows)));
357
	}
358

    
359
	private Oaf getOafBody(final List<Row> rows) throws InvalidProtocolBufferException {
360
		for(Row row : rows) {
361
			if(StringUtils.startsWith(row.getKey(), "50")) {
362
				return Oaf.parseFrom(row.getColumn("body").getValue());
363

    
364
			}
365
		}
366
		return null;
367
	}
368

    
369
	@Test
370
	public void testParseDoajOAF() throws Exception {
371

    
372
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("doajUniversityRecord.xml"));
373
	}
374

    
375
	@Test
376
	public void testParseDatasource() throws Exception {
377

    
378
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("datasourceNative.xml"));
379
	}
380
	@Test
381
	public void testParseDatasourcePiwik() throws Exception {
382

    
383
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("datasourcePiwik.xml"));
384
	}
385

    
386
	@Test
387
	public void testParseDataDatasource() throws Exception {
388

    
389
		doTest(loadFromTransformationProfile("datasources_2_hbase.xsl"), load("dataDatasource.xml"));
390
	}
391

    
392
	@Test
393
	public void testFromMongodbCompressedDump() throws Exception {
394
		doTestJsonGz(loadFromTransformationProfile("oaf2hbase.xml"), load("mdstore_cleaned.json.gz"));
395
	}
396

    
397
	@Test
398
	public void testLoadFromTransformationProfile() throws IOException {
399
		InputStream in = loadFromTransformationProfile("oaf2hbase.xml");
400
		log.info(IOUtils.toString(in));
401
	}
402

    
403
	@Test
404
	public void test_template() throws Exception {
405
		final String xslt = IOUtils.toString(loadFromTransformationProfile("oaf2hbase.xml"));
406
		final XsltRowTransformer transformer = factory.getTransformer(xslt);
407
		assertNotNull(transformer);
408

    
409
		final String record = IOUtils.toString(load("record.xml"));
410
		final List<Row> rows = transformer.apply(record);
411

    
412
		System.out.println(rows);
413
	}
414

    
415
	@Test
416
	public void testWrongCharsOrganization() throws Exception {
417
		final List<Row> rows = Lists.newArrayList();
418
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organizationWrongChars.xml")));
419
		printAll(mapAll(buildTable(rows)));
420
	}
421

    
422
	@Test
423
	public void testParseProjectWithFunderOriginalName() throws Exception {
424

    
425
		doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectWithFunderOriginalName.xml"));
426
	}
427
	@Test
428
	public void testLinkFunderOriginalName() throws Exception {
429

    
430
		final List<Row> rows = Lists.newArrayList();
431
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectWithFunderOriginalName.xml")));
432
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordFunderOriginalName.xml")));
433

    
434
		printAll(mapAll(buildTable(rows)));
435
	}
436

    
437
	@Test
438
	public void testProjectExtraInfo() throws Exception {
439
		final List<Row> rows = Lists.newArrayList();
440
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordExtraInfo.xml")));
441
		printAll(mapAll(buildTable(rows)));
442
	}
443

    
444
	@Test
445
	public void testParseSoftwareFromODF() throws Exception {
446
		final List<Row> rows = Lists.newArrayList();
447
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("softwareODF.xml")));
448
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
449
		printAll(mapAll(buildTable(rows)));
450
	}
451

    
452
	@Test(expected = AssertionError.class)
453
	public void testParseSoftwareFromOAF() throws Exception {
454
		final List<Row> rows = Lists.newArrayList();
455
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordOAFsoftware.xml")));
456
		printAll(mapAll(buildTable(rows)));
457
	}
458

    
459
	private void doTest(final InputStream xsltStream, final InputStream recordStream) throws Exception {
460
		try {
461
			final List<Row> rows = asRows(xsltStream, recordStream);
462

    
463
			log.info(rows);
464

    
465
			final Map<String, Map<String, Map<String, byte[]>>> table = buildTable(rows);
466

    
467
			// System.out.println("\n" + table.toString());
468

    
469
			final Map<String, XmlRecordFactory> builders = mapAll(table);
470

    
471
			printAll(builders);
472
		} catch (final InvalidProtocolBufferException e) {
473
			throw new Exception(e);
474
		} catch (final TransformerConfigurationException e) {
475
			throw new Exception(e);
476
		} catch (final TransformerFactoryConfigurationError e) {
477
			throw new Exception(e);
478
		} catch (final DocumentException e) {
479
			throw new Exception(e);
480
		}
481
	}
482

    
483
	private void doTestJsonGz(final InputStream xsltStream, final InputStream recordStream) throws Exception {
484

    
485
		final Iterator<List<Row>> rowsIterator = asRowsJsonGzip(xsltStream, recordStream);
486

    
487
		int i = 0;
488
		while (rowsIterator.hasNext()) {
489
			final List<Row> rows = rowsIterator.next();
490
			i++;
491

    
492
			if ((i % 10000) == 0) {
493
				System.out.println(i);
494
			}
495

    
496
			final Map<String, Map<String, Map<String, byte[]>>> table = buildTableDoaj(rows);
497

    
498
			for (final Map<String, Map<String, byte[]>> m : table.values()) {
499
				for (final Map<String, byte[]> mv : m.values()) {
500
					for (final byte[] v : mv.values()) {
501
						final OafDecoder d = OafDecoder.decode(v);
502
						assertNotNull(d);
503
						assertNotNull(d.getOaf());
504

    
505
						switch (d.getKind()) {
506
						case entity:
507
							assertNotNull(d.getMetadata());
508
							if (d.getOaf().getEntity().getType().equals(Type.result)) {
509
								System.out.println(d.getOaf());
510
							}
511
							break;
512
						case relation:
513
							assertNotNull(d.getRel());
514
							break;
515
						default:
516
							break;
517
						}
518
					}
519
				}
520
			}
521
		}
522
	}
523

    
524
	private List<Row> asRows(final InputStream xsltStream, final InputStream recordStream, final Function<Row, Row> p) throws Exception {
525
		return asRows(xsltStream, new HashMap<>(), recordStream, p);
526
	}
527

    
528
	private List<Row> asRows(final InputStream xsltStream, final InputStream recordStream) throws Exception {
529
		return asRows(xsltStream, new HashMap<>(), recordStream);
530
	}
531

    
532
	private List<Row> asRows(final InputStream xsltStream, final Map<String, Object> params, final InputStream recordStream) throws Exception {
533
		return asRows(xsltStream, params, recordStream, null);
534
	}
535

    
536
	private List<Row> asRows(final InputStream xsltStream, final Map<String, Object> params, final InputStream recordStream, final Function<Row, Row> p) throws Exception {
537
		final String xslt = IOUtils.toString(xsltStream);
538
		final XsltRowTransformer transformer = factory.getTransformer(xslt, params);
539
		assertNotNull(transformer);
540

    
541
		final String record = IOUtils.toString(recordStream);
542
		final List<Row> rows = transformer.apply(record);
543

    
544
		assertNotNull(rows);
545
		assertFalse(rows.isEmpty());
546
		return p == null ? rows : Lists.newArrayList(Iterables.transform(rows, p));
547
	}
548

    
549
	private Iterator<List<Row>> asRowsJsonGzip(final InputStream xsltStream, final InputStream recordStreamJsonGzip) throws Exception {
550
		final String xslt = IOUtils.toString(xsltStream);
551
		final XsltRowTransformer transformer = factory.getTransformer(xslt);
552
		assertNotNull(transformer);
553
		assertNotNull(recordStreamJsonGzip);
554

    
555
		final GZIPInputStream stream = new GZIPInputStream(recordStreamJsonGzip);
556
		assertNotNull(stream);
557
		final BufferedReader inStream = new BufferedReader(new InputStreamReader(stream));
558
		assertNotNull(inStream);
559
		return new Iterator<List<Row>>() {
560

    
561
			String jsonRecord = null;
562

    
563
			@Override
564
			public boolean hasNext() {
565
				try {
566
					return (jsonRecord = inStream.readLine()) != null;
567
				} catch (final IOException e) {
568
					throw new RuntimeException(e);
569
				}
570
			}
571

    
572
			@Override
573
			public List<Row> next() {
574

    
575
				final JSONObject jsonObj = new JSONObject(jsonRecord);
576
				final String body = jsonObj.getString("body");
577
				try {
578
					assertTrue(StringUtils.isNotBlank(body));
579
					// System.out.println(body);
580
					final List<Row> rows = transformer.apply(body);
581
					assertNotNull(rows);
582
					assertFalse(rows.isEmpty());
583
					return rows;
584
				} catch (final Throwable e) {
585
					System.err.println("error transforming document: " + body);
586
					throw new RuntimeException(e);
587
				}
588
			}
589

    
590
			@Override
591
			public void remove() {
592
				throw new UnsupportedOperationException();
593
			}
594

    
595
		};
596

    
597
	}
598

    
599
	private Map<String, Map<String, Map<String, byte[]>>> buildTableDoaj(final List<Row> rows) throws UnsupportedEncodingException {
600
		final Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
601

    
602
		for (final Row row : rows) {
603
			final String rowKey = row.getKey();
604
			final String cf = row.getColumnFamily();
605
			if (!table.containsKey(rowKey)) {
606
				table.put(rowKey, new HashMap<>());
607
			}
608
			if (!table.get(rowKey).containsKey(cf)) {
609
				table.get(rowKey).put(row.getColumnFamily(), new HashMap<>());
610
			}
611
			for (final Column<String, byte[]> c : row.getColumns()) {
612
				// System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
613
				table.get(rowKey).get(cf).put(c.getName(), c.getValue());
614
				if (cf.equals("result") && c.getName().equals("body")) {
615
					// System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
616
					assertTrue(StringUtils.isNotBlank(new String(c.getValue(), "UTF-8")));
617
				}
618
			}
619
		}
620
		return table;
621

    
622
	}
623

    
624
	private Map<String, Map<String, Map<String, byte[]>>> buildTable(final List<Row> rows) throws UnsupportedEncodingException {
625
		final Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
626

    
627
		for (final Row row : rows) {
628
			final String rowKey = row.getKey();
629
			final String cf = row.getColumnFamily();
630
			if (!table.containsKey(rowKey)) {
631
				table.put(rowKey, new HashMap<String, Map<String, byte[]>>());
632
			}
633
			if (!table.get(rowKey).containsKey(cf)) {
634
				table.get(rowKey).put(row.getColumnFamily(), new HashMap<String, byte[]>());
635
			}
636
			for (final Column<String, byte[]> c : row.getColumns()) {
637
				System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
638
				table.get(rowKey).get(cf).put(c.getName(), c.getValue());
639
				if (c.getName().equals("body")) {
640
					final String theBody = new String(c.getValue(), "UTF-8");
641
					assertTrue(StringUtils.isNotBlank(theBody));
642
					//System.out.println(theBody);
643
				}
644
			}
645
		}
646
		return table;
647

    
648
	}
649

    
650
	private Map<String, XmlRecordFactory> mapAll(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
651

    
652
		final Map<String, XmlRecordFactory> builders = Maps.newHashMap();
653
		for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
654
			map(builders, e.getKey(), e.getValue());
655
		}
656
		return builders;
657
	}
658

    
659
	// private Map<String, XmlRecordFactory> mapResultsOnly(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
660
	//
661
	// final Map<String, XmlRecordFactory> builders = Maps.newHashMap();
662
	// for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
663
	// final Type type = OafRowKeyDecoder.decode(e.getKey()).getType();
664
	// if (type == Type.result) {
665
	// map(builders, e.getKey(), e.getValue());
666
	// }
667
	// }
668
	// return builders;
669
	// }
670

    
671
	private void map(final Map<String, XmlRecordFactory> builders, final String rowKey, final Map<String, Map<String, byte[]>> row) throws Exception {
672

    
673
		final Type type = OafRowKeyDecoder.decode(rowKey).getType();
674

    
675
		final Map<String, byte[]> familyMap = row.get(type.toString());
676

    
677
		if (familyMap == null) return;
678

    
679
		final byte[] bodyB = familyMap.get("body");
680

    
681
		if (bodyB != null) {
682
			ensureBuilder(builders, rowKey);
683

    
684
			final Oaf oaf = UpdateMerger.mergeBodyUpdates(familyMap);
685

    
686
			final OafDecoder mainEntity = OafDecoder.decode(oaf);
687

    
688
			builders.get(rowKey).setMainEntity(mainEntity);
689

    
690
			for (final LinkDescriptor ld : entityConfigTable.getDescriptors(type)) {
691

    
692
				final String it = ld.getRelDescriptor().getIt();
693
				final Map<String, byte[]> cols = row.get(it);
694

    
695
				if ((cols != null) && !cols.isEmpty()) {
696

    
697
					for (final byte[] oafB : cols.values()) {
698

    
699
						final Oaf.Builder relBuilder = Oaf.newBuilder(Oaf.parseFrom(oafB));
700

    
701
						if (ld.isSymmetric()) {
702
							final RelDescriptor rd = ld.getRelDescriptor();
703

    
704
							relBuilder.getRelBuilder().setCachedTarget(mainEntity.getEntity()).setRelType(rd.getRelType()).setSubRelType(rd.getSubRelType());
705
						}
706

    
707
						relBuilder.getRelBuilder().setChild(ld.isChild());
708

    
709
						final Oaf.Builder oafBuilder = Oaf.newBuilder().setKind(Kind.relation).setLastupdatetimestamp(System.currentTimeMillis());
710
						oafBuilder.mergeFrom(relBuilder.build());
711

    
712
						final String targetId = ld.isSymmetric() ? oafBuilder.getRel().getTarget() : oafBuilder.getRel().getSource();
713
						ensureBuilder(builders, targetId);
714
						final OafDecoder decoder = OafDecoder.decode(oafBuilder.build());
715

    
716
						if (ld.isChild()) {
717
							builders.get(targetId).addChild(type, decoder);
718
						} else {
719
							builders.get(targetId).addRelation(type, decoder);
720
						}
721
					}
722

    
723
				}
724
			}
725
		}
726

    
727
	}
728

    
729
	private void printAll(final Map<String, XmlRecordFactory> builders) throws DocumentException {
730
		print(Sets.newHashSet(Type.values()), builders, null);
731
	}
732

    
733
	private void printPerson(final Map<String, XmlRecordFactory> builders) throws DocumentException {
734
		print(Sets.newHashSet(Type.person), builders, null);
735
	}
736

    
737
	private void printPersonFullnames(final Map<String, XmlRecordFactory> builders) throws DocumentException {
738

    
739
		final Map<Type, Set<String>> xpaths = Maps.newHashMap();
740

    
741
		final Set<String> personPaths = Sets.newHashSet();
742

    
743
		personPaths.add("//fullname");
744

    
745
		xpaths.put(Type.person, personPaths);
746

    
747
		print(Sets.newHashSet(Type.person), builders, xpaths);
748
	}
749

    
750
	private void print(final Set<Type> types, final Map<String, XmlRecordFactory> builders, final Map<Type, Set<String>> xpaths) throws DocumentException {
751
		final SAXReader r = new SAXReader();
752

    
753
		for (final Entry<String, XmlRecordFactory> e : builders.entrySet()) {
754
			final OafRowKeyDecoder kd = OafRowKeyDecoder.decode(e.getKey());
755

    
756
			if (!e.getValue().isValid()) throw new IllegalArgumentException("invalid builder: " + e.getKey());
757
			if (types.contains(kd.getType())) {
758
				final String val = IndentXmlString.apply(e.getValue().build());
759

    
760
				if ((xpaths != null) && !xpaths.isEmpty() && (xpaths.get(kd.getType()) != null)) {
761
					final Document doc = r.read(new StringReader(val));
762

    
763
					log.debug("\n" + e.getKey());
764
					for (final String xpath : xpaths.get(kd.getType())) {
765
						log.debug(doc.valueOf(xpath));
766
					}
767
				} else {
768
					log.info(val);
769
				}
770
			}
771
		}
772
	}
773

    
774
	private void printNoIndent(final Map<String, XmlRecordFactory> builders) {
775
		for (final Entry<String, XmlRecordFactory> e : builders.entrySet()) {
776
			if (e.getValue().isValid()) {
777
				log.debug(e.getValue().build());
778
			} else {
779
				log.debug("invalid builder: " + e.getKey());
780
			}
781
		}
782
	}
783

    
784
	private void ensureBuilder(final Map<String, XmlRecordFactory> builders, final String rowKey) throws Exception {
785
		if (!builders.containsKey(rowKey)) {
786
			builders.put(rowKey, newBuilder());
787
		}
788
	}
789

    
790
	private XmlRecordFactory newBuilder() throws TransformerConfigurationException, TransformerFactoryConfigurationError, DocumentException {
791
		return new XmlRecordFactory(entityConfigTable, ContextMapper.fromXml(Context.xml),
792
				RelClasses.fromJSon(RelClassesTest.relClassesJson), XmlRecordFactoryTest.SCHEMA_LOCATION, true, false, false, XmlRecordFactoryTest.specialDatasourceTypes);
793
	}
794

    
795
	private InputStream load(final String fileName) {
796
		return getClass().getResourceAsStream(fileName);
797
	}
798

    
799
	private InputStream loadFromTransformationProfile(final String profilePath) {
800
		log.info("Loading xslt from: " + basePathProfiles + profilePath);
801
		InputStream profile = getClass().getResourceAsStream(basePathProfiles + profilePath);
802
		SAXReader saxReader = new SAXReader();
803
		Document doc = null;
804
		try {
805
			doc = saxReader.read(profile);
806
		} catch (DocumentException e) {
807
			e.printStackTrace();
808
			throw new RuntimeException(e);
809
		}
810
		String xslt = doc.selectSingleNode("//SCRIPT/CODE/*[local-name()='stylesheet']").asXML();
811
		return IOUtils.toInputStream(xslt);
812
	}
813

    
814
}
    (1-1/1)