Project

General

Profile

1
package eu.dnetlib.data.transform;
2

    
3
import com.google.common.collect.Lists;
4
import com.google.common.collect.Maps;
5
import com.googlecode.protobuf.format.JsonFormat;
6
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder;
7
import eu.dnetlib.data.mapreduce.util.dao.HBaseTableDAO;
8
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
9
import eu.dnetlib.data.proto.TypeProtos.Type;
10

    
11
import eu.dnetlib.data.proto.dli.Scholix2ObjectProtos;
12
import eu.dnetlib.dli.proto.DNGF2Scholix2Converter;
13
import eu.dnetlib.dli.proto.DNGFDLISummaryConverter;
14
import org.apache.commons.lang3.StringUtils;
15
import org.apache.commons.logging.Log;
16
import org.apache.commons.logging.LogFactory;
17
import org.junit.Before;
18
import org.junit.Test;
19

    
20
import java.util.Iterator;
21
import java.util.List;
22
import java.util.Map;
23
import java.util.Map.Entry;
24

    
25
import static eu.dnetlib.data.graph.utils.RelDescriptor.QUALIFIER_SEPARATOR;
26
import static eu.dnetlib.data.proto.dli.Scholix2ObjectProtos.*;
27

    
28
/**
29
 * Created by sandro on 2/13/17.
30
 */
31
public class DLIConvertertTest extends AbstractTransformerTest {
32

    
33
	private static final Log log = LogFactory.getLog(DLIConvertertTest.class);
34

    
35
	private Ontologies ontologies;
36

    
37
    @Before
38
    public void setUp() throws Exception {
39
        factory = new XsltRowTransformerFactory();
40
        ontologies = OntologyLoader.loadOntologiesFromCp();
41
    }
42

    
43
    @Test
44
    public void testLinkPangaeaDLI() throws Exception {
45

    
46
        final List<Row> rows = Lists.newArrayList();
47
        rows.addAll(asRows(loadFromTransformationProfile("dmfdli2hbase.xml"), load("record_dli_dmf.xml")));
48
        rows.addAll(asRows(loadFromTransformationProfile("dmfdli2hbase.xml"), load("record_dli_dmf2.xml")));
49
//        rows.addAll(asRows(loadFromTransformationProfile("pmfdli2hbase.xml"), load("record_dli_pmf.xml")));
50
	    printAllSummary(mapAllSummary(buildTable(rows)));
51
    }
52

    
53
	@Test
54
	public void testLinkPangaeaDLIScholix() throws Exception {
55

    
56
		final List<Row> rows = Lists.newArrayList();
57
		rows.addAll(asRows(loadFromTransformationProfile("dmfdli2hbase.xml"), load("record_dli_dmf2.xml")));
58
		rows.addAll(asRows(loadFromTransformationProfile("dmfdli2hbase.xml"), load("record_dli_dmf.xml")));
59
//		rows.forEach(row ->
60
//				row.getColumns().forEach(
61
//						result -> {
62
//							if (result != null) {
63
//
64
//
65
//								final DNGFDecoder decoder =
66
//										DNGFDecoder.decode(result.getValue(), DliFieldTypeProtos.completionStatus, DliProtos.completionStatus, DliProtos.resolvedfrom, DliProtos.typedIdentifier);
67
//								System.out.println(row.getKey());
68
//								Put put = HBaseTableDAO.asPutByCollectedFrom(decoder.getDNGF());
69
//								System.out.println("put.getTimeStamp() = " + put.getTimeStamp());
70
//
71
//								System.out.println("decoder.getDNGF().toString() = " + decoder.getDNGF().toString());
72
//							}
73
//						}
74
//				));
75

    
76

    
77

    
78

    
79
//		//rows.addAll(asRows(loadFromTransformationProfile("pmfdli2hbase.xml"), load("record_dli_pmf.xml")));
80
		reduceScholix(mapAllScholix(buildTable(rows)));
81
	}
82

    
83
	private void reduceScholix(final Map<String, List<Scholix>> scholix) {
84
    	scholix.forEach((id, list) -> {
85
			    final Iterator<Scholix> it = list.iterator();
86
			    final Scholix source = it.next();
87
			    if (source.hasSource()) {
88
				    it.forEachRemaining(s -> log.info(JsonFormat.printToString(
89
						    Scholix.newBuilder(s)
90
								    .setSource(source.getSource())
91
								    .build())));
92
			    } else {
93
			    	log.warn("missing source in scholix: " + id);
94
			    }
95
		    }
96
	    );
97
    }
98

    
99
	private void printAllSummary(final Map<String, DNGFDLISummaryConverter> converters) {
100
		converters.forEach((id, converter) -> log.info(JsonFormat.printToString(converter.convert())));
101
	}
102

    
103
	private void printAllScholix(final Map<String, List<Scholix>> converters) {
104
		converters.values().forEach(converter -> converter.forEach(s -> log.info(JsonFormat.printToString(s))));
105
	}
106

    
107
	private Map<String, DNGFDLISummaryConverter> mapAllSummary(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
108
		final Map<String, DNGFDLISummaryConverter> builders = Maps.newHashMap();
109
		for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
110
			mapSummary(builders, e.getKey(), e.getValue());
111
	    }
112
	    return builders;
113
    }
114

    
115
	private Map<String, List<Scholix>> mapAllScholix(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
116
		final Map<String, List<Scholix>> builders = Maps.newHashMap();
117
		for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
118
			mapScholix(builders, e.getKey(), e.getValue());
119
		}
120
		return builders;
121
	}
122

    
123
	private void mapScholix(final Map<String, List<Scholix>> builders, final String rowKey, final Map<String, Map<String, byte[]>> row) {
124

    
125
		final Type type = DNGFRowKeyDecoder.decode(rowKey).getType();
126
		final Map<String, byte[]> familyMap = row.get("metadata");
127

    
128
		if (familyMap == null) return;
129

    
130
		final byte[] bodyB = familyMap.get(type.toString());
131

    
132
		if (bodyB != null) {
133
			ensureScholixBuilder(builders, rowKey);
134
			final DNGF entity = HBaseTableDAO.parseProto(familyMap.get(type.toString()));
135

    
136
			final Scholix.Builder source = DNGF2Scholix2Converter.withSource(entity.getEntity());
137
			builders.get(rowKey).add(source.build());
138

    
139
			final Map<String, byte[]> rels = row.get("rels");
140
			rels.forEach((q, v) -> {
141
				final DNGF.Builder r = DNGF.newBuilder(HBaseTableDAO.parseProto(v));
142
                final Scholix.Builder target = DNGF2Scholix2Converter.withTarget(entity.getEntity(), r.getRel());
143
                ensureScholixBuilder(builders, r.getRel().getTarget());
144
				builders.get(r.getRel().getTarget()).add(target.build());
145
			});
146
		}
147
	}
148

    
149
	private void mapSummary(final Map<String, DNGFDLISummaryConverter> converters, final String rowKey, final Map<String, Map<String, byte[]>> row) throws Exception {
150

    
151
		final Type type = DNGFRowKeyDecoder.decode(rowKey).getType();
152

    
153
		final Map<String, byte[]> familyMap = row.get("metadata");
154

    
155
		if (familyMap == null) return;
156

    
157
		final byte[] bodyB = familyMap.get(type.toString());
158

    
159
		if (bodyB != null) {
160
			ensureSummaryBuilder(converters, rowKey);
161

    
162
			final DNGF mainEntity = HBaseTableDAO.parseProto(familyMap.get(type.toString()));
163
			converters.get(rowKey).setMainEntity(mainEntity);
164

    
165
			final Map<String, byte[]> rels = row.get("rels");
166

    
167
			rels.forEach((q, v) -> {
168

    
169
				final DNGF.Builder rel = DNGF.newBuilder(HBaseTableDAO.parseProto(v));
170
				rel.getRelBuilder().setCachedTarget(mainEntity.getEntity());
171

    
172
				final String source = rel.getRel().getSource();
173
				final Type sourceType = rel.getRel().getSourceType();
174

    
175
				rel.getRelBuilder().setSource(rel.getRel().getTarget());
176
				rel.getRelBuilder().setSourceType(rel.getRel().getTargetType());
177

    
178
				rel.getRelBuilder().setTarget(source);
179
				rel.getRelBuilder().setTargetType(sourceType);
180

    
181
				final Ontology o = ontologies.get(rel.getRel().getRelType().getSchemeid());
182
				final String inverse = o.inverseOf(rel.getRel().getRelType().getClassid());
183
				rel.getRelBuilder().getRelTypeBuilder().setClassid(inverse).setClassname(inverse);
184

    
185
				final String targetId = StringUtils.substringAfter(q, QUALIFIER_SEPARATOR);
186

    
187
				ensureSummaryBuilder(converters, targetId);
188
				converters.get(targetId).addRelation(rel.build());
189
			});
190
		}
191

    
192
	}
193

    
194
	private void ensureSummaryBuilder(final Map<String, DNGFDLISummaryConverter> builders, final String rowKey) {
195
		if (!builders.containsKey(rowKey)) {
196
			builders.put(rowKey, new DNGFDLISummaryConverter());
197
		}
198
	}
199

    
200
	private void ensureScholixBuilder(final Map<String, List<Scholix>> builders, final String rowKey) {
201
		if (!builders.containsKey(rowKey)) {
202
			builders.put(rowKey, Lists.newLinkedList());
203
		}
204
	}
205

    
206
}
(2-2/5)