Project

General

Profile

1
package eu.dnetlib.data.mapreduce.util;
2

    
3
import com.google.common.base.Splitter;
4
import com.google.common.collect.Sets;
5
import com.google.gson.JsonArray;
6
import com.google.gson.JsonElement;
7
import com.google.gson.JsonObject;
8
import com.google.protobuf.Descriptors;
9
import com.google.protobuf.GeneratedMessage;
10
import eu.dnetlib.data.graph.model.DNGFDecoder;
11
import eu.dnetlib.data.graph.model.DNGFEntityDecoder;
12
import eu.dnetlib.data.graph.model.DNGFRelDecoder;
13
import eu.dnetlib.data.graph.utils.RelDescriptor;
14
import eu.dnetlib.data.mapreduce.hbase.index.config.ContextDef;
15
import eu.dnetlib.data.mapreduce.hbase.index.config.ContextMapper;
16
import eu.dnetlib.data.mapreduce.hbase.index.config.EntityConfigTable;
17
import eu.dnetlib.data.mapreduce.hbase.index.config.LinkDescriptor;
18
import eu.dnetlib.data.proto.*;
19
import eu.dnetlib.data.transform.Ontologies;
20
import org.apache.commons.lang3.StringUtils;
21

    
22
import javax.xml.transform.TransformerException;
23
import java.util.List;
24
import java.util.Map;
25
import java.util.Set;
26

    
27
/**
28
 * Created by sandro on 1/9/17.
29
 */
30
public class JSONRecordFactory extends AbstractRecordFactory {
31

    
32
    public JSONRecordFactory(EntityConfigTable entityConfigTable, ContextMapper contextMapper, Ontologies ontologies, boolean entityDefaults, boolean relDefaults, boolean childDefaults) {
33
        super(entityConfigTable, contextMapper, ontologies, entityDefaults, relDefaults, childDefaults);
34
    }
35

    
36
    @Override
37
    public String build() {
38
        try {
39
            final DNGFEntityDecoder entity = mainEntity.decodeEntity();
40

    
41
            final JsonObject result = new JsonObject();
42

    
43
            final TypeProtos.Type type = entity.getType();
44

    
45

    
46
            // rels has to be processed before the contexts because they enrich the contextMap with the funding info.
47
            final JsonArray rels = listRelations();
48
            result.add("rels", rels);
49
            buildContexts(type, result);
50

    
51

    
52
//          metadata.add(parseDataInfo(mainEntity));
53
//
54
//          final String body = templateFactory.buildBody(type, metadata, rels, listChildren(), extraInfo);
55

    
56
            result.add("metadata", decodeType(entity, null, entityDefaults, false));
57

    
58
            return result.toString();
59

    
60
        } catch (final Throwable e) {
61
            throw new RuntimeException(String.format("error building record '%s'", this.key), e);
62
        }
63
    }
64

    
65
    private JsonObject decodeType(final DNGFEntityDecoder decoder, final Set<String> filter, final boolean defaults, final boolean expandingRel) {
66

    
67
        final JsonObject root = new JsonObject();
68
        addFields(root, decoder.getMetadata(), filter, defaults, expandingRel);
69
        if ((decoder.getEntity() instanceof PublicationProtos.Publication) && !expandingRel) {
70
            asJsonElement("bestlicense", "", getBestLicense(), null, root);
71
            addFields(root, decoder.getEntity(), filter, defaults, expandingRel);
72
        }
73
        if ((decoder.getEntity() instanceof PersonProtos.Person) && !expandingRel) {
74
            addFields(root, decoder.getEntity(), filter, defaults, expandingRel);
75
        }
76
        if ((decoder.getEntity() instanceof ProjectProtos.Project) && !expandingRel) {
77
            addFields(root, decoder.getEntity(), filter, defaults, expandingRel);
78
        }
79

    
80
        return root;
81
    }
82

    
83

    
84
    private void addFields(final JsonObject metadata, final GeneratedMessage fields, final Set<String> filter, final boolean defaults, final boolean expandingRel) {
85

    
86
        if (fields != null) {
87
            final Set<String> seen = Sets.newHashSet();
88
            for (final Map.Entry<Descriptors.FieldDescriptor, Object> e : filterFields(fields, filter)) {
89
                final String name = e.getKey().getName();
90
                seen.add(name);
91
                addFieldValue(metadata, e.getKey(), e.getValue(), expandingRel);
92
            }
93
            if (defaults) {
94
                for (final Descriptors.FieldDescriptor fd : fields.getDescriptorForType().getFields()) {
95
                    if (!seen.contains(fd.getName())) {
96
                        addFieldValue(metadata, fd, getDefault(fd), expandingRel);
97
                    }
98
                }
99
            }
100
        }
101

    
102
    }
103

    
104

    
105
    private void addFieldValue(final JsonObject metadata, final Descriptors.FieldDescriptor fd, final Object value, final boolean expandingRel) {
106
        if ("dateofcollection".equals(fd.getName()) ||
107
                "dateoftransformation".equals(fd.getName()) ||
108
                "id".equals(fd.getName()) ||
109
                (value == null)) return;
110

    
111
        if (fd.getName().equals("datasourcetype")) {
112
            final String classid = ((FieldTypeProtos.Qualifier) value).getClassid();
113

    
114
            final FieldTypeProtos.Qualifier.Builder q = FieldTypeProtos.Qualifier.newBuilder((FieldTypeProtos.Qualifier) value);
115
            if (specialDatasourceTypes.contains(classid)) {
116
                q.setClassid("other").setClassname("other");
117
            }
118

    
119
            asJsonElement("datasourcetypeui", "", q.build(), null, metadata);
120
        }
121

    
122
        if (fd.isRepeated() && (value instanceof List<?>)) {
123
            for (final Object o : (List<Object>) value) {
124
                guessType(metadata, fd, o, expandingRel);
125
            }
126
        } else {
127
            guessType(metadata, fd, value, expandingRel);
128
        }
129
    }
130

    
131
    private void guessType(JsonObject metadata, Descriptors.FieldDescriptor fd, Object object, boolean expandingRel) {
132

    
133
        if (fd.getType().equals(Descriptors.FieldDescriptor.Type.MESSAGE)) {
134

    
135
            if (FieldTypeProtos.Qualifier.getDescriptor().equals(fd.getMessageType())) {
136
                final FieldTypeProtos.Qualifier qualifier = (FieldTypeProtos.Qualifier) object;
137
                asJsonElement(fd.getName(), "", qualifier, null, metadata);
138
            } else if (FieldTypeProtos.StructuredProperty.getDescriptor().equals(fd.getMessageType())) {
139
                final FieldTypeProtos.StructuredProperty sp = (FieldTypeProtos.StructuredProperty) object;
140
                asJsonElement(fd.getName(), sp.getValue(), sp.getQualifier(), sp.hasDataInfo() ? sp.getDataInfo() : null, metadata);
141

    
142
                if (!expandingRel && fd.getName().equals("pid")) {
143
                    if (sp.getQualifier().getClassid().equalsIgnoreCase("doi")) {
144
                        incrementCounter("doi");
145
                    }
146
                }
147
            } else if (FieldTypeProtos.KeyValue.getDescriptor().equals(fd.getMessageType())) {
148
                final FieldTypeProtos.KeyValue kv = (FieldTypeProtos.KeyValue) object;
149

    
150
                JsonObject tmp = new JsonObject();
151
                tmp.addProperty("name", kv.getValue());
152
                tmp.addProperty("id", removePrefix(kv.getValue()));
153
                metadata.add(fd.getName(), tmp);
154

    
155
            } else if (FieldTypeProtos.StringField.getDescriptor().equals(fd.getMessageType())) {
156
                final String fieldName = fd.getName();
157
                final FieldTypeProtos.StringField sf = (FieldTypeProtos.StringField) object;
158
                final JsonObject sb = new JsonObject();
159
                if (sf.hasDataInfo()) {
160
                    final FieldTypeProtos.DataInfo dataInfo = sf.getDataInfo();
161
                    dataInfoAsAttributes(sb, dataInfo);
162
                    if (!StringUtils.isEmpty(sf.getValue()))
163
                        sb.addProperty("value", sf.getValue());
164
                    if (sb.entrySet().size() > 0)
165
                        metadata.add(fieldName, sb);
166
                } else {
167
                    if (!StringUtils.isEmpty(sf.getValue()))
168
                        metadata.addProperty(fd.getName(), sf.getValue());
169
                }
170
            } else if (FieldTypeProtos.BoolField.getDescriptor().equals(fd.getMessageType())) {
171
                final FieldTypeProtos.BoolField bf = (FieldTypeProtos.BoolField) object;
172
                if (bf.hasDataInfo()) {
173
                    final JsonObject sb = new JsonObject();
174
                    final FieldTypeProtos.DataInfo dataInfo = bf.getDataInfo();
175
                    dataInfoAsAttributes(sb, dataInfo);
176
                    sb.addProperty("value", bf.getValue());
177
                    metadata.add(fd.getName(), sb);
178
                } else
179
                    metadata.addProperty(fd.getName(), bf.getValue());
180

    
181
            } else if (FieldTypeProtos.Journal.getDescriptor().equals(fd.getMessageType()) && (object != null)) {
182
                final FieldTypeProtos.Journal j = (FieldTypeProtos.Journal) object;
183
                JsonObject journal = new JsonObject();
184
                journal.addProperty("issn", j.getIssnPrinted());
185
                journal.addProperty("eissn", j.getIssnOnline());
186
                journal.addProperty("lissn", j.getIssnLinking());
187
                journal.addProperty("value", j.getName());
188
                metadata.add("journal", journal);
189
            } else if (FieldTypeProtos.Context.getDescriptor().equals(fd.getMessageType()) && (object != null)) {
190
                contextes.add(((FieldTypeProtos.Context) object).getId());
191
            } else if (FieldTypeProtos.ExtraInfo.getDescriptor().equals(fd.getMessageType()) && (object != null)) {
192
                final FieldTypeProtos.ExtraInfo e = (FieldTypeProtos.ExtraInfo) object;
193
                final JsonObject sb = new JsonObject();
194
                sb.addProperty("name", e.getName());
195
                sb.addProperty("typology", e.getTypology());
196
                sb.addProperty("provenance", e.getProvenance());
197
                sb.addProperty("trust", e.getTrust());
198
                sb.addProperty("value", e.getValue());
199
                metadata.add(fd.getName(), sb);
200
            }
201

    
202

    
203
        } else if (fd.getType().equals(Descriptors.FieldDescriptor.Type.ENUM)) {
204
            if (fd.getFullName().equals("eu.dnetlib.data.proto.DNGFEntity.type")) return;
205
            asJsonElement(fd.getName(), ((Descriptors.EnumValueDescriptor) object).getName(), null, null, metadata);
206
        } else {
207
            asJsonElement(fd.getName(), object.toString(), null, null, metadata);
208
        }
209

    
210
    }
211

    
212

    
213
    private JsonArray listRelations() {
214

    
215
        JsonArray relations = new JsonArray();
216

    
217
        for (final DNGFDecoder decoder : this.relations) {
218

    
219
            JsonObject root = new JsonObject();
220

    
221

    
222
            final DNGFProtos.DNGFRel rel = decoder.getDNGFRel();
223
            final DNGFProtos.DNGFEntity cachedTarget = rel.getCachedTarget();
224
            final DNGFRelDecoder relDecoder = DNGFRelDecoder.decode(rel);
225

    
226
            // if (!relDecoder.getRelType().equals(RelType.personResult) || relDecoder.getRelTargetId().equals(key)) {
227
            if (relDecoder.getRelSourceId().equals(key) || relDecoder.getRelTargetId().equals(key)) {
228

    
229

    
230
                final TypeProtos.Type targetType = relDecoder.getTargetType();
231
                final Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor());
232

    
233

    
234
                addFields(root, relDecoder.getDngfRel(), relFilter, false, true);
235

    
236

    
237
                final RelDescriptor relDescriptor = relDecoder.getRelDescriptor();
238

    
239
                if ((cachedTarget != null) && cachedTarget.isInitialized()) {
240

    
241
                    final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor);
242
                    root.add("cachedTarget", decodeType(DNGFEntityDecoder.decode(cachedTarget), filter, relDefaults, true));
243
                }
244

    
245
                final String semanticclass = ontologies.inverseOf(relDescriptor);
246
                final String semanticscheme = relDescriptor.getOntologyCode();
247

    
248
                final String rd = semanticscheme + "_" + semanticclass;
249
                incrementCounter(rd);
250

    
251
                final FieldTypeProtos.DataInfo info = decoder.getDNGF().getDataInfo();
252

    
253
                manageInferred(rd, info);
254

    
255
                final LinkDescriptor ld = entityConfigTable.getDescriptor(relDecoder.getTargetType(), relDescriptor);
256

    
257
                final String relId = (ld != null) && !ld.isSymmetric() ? relDecoder.getRelTargetId() : relDecoder.getRelSourceId();
258

    
259
                relations.add(root);
260

    
261
//                rels.add(templateFactory.getRel(targetType, relId, metadata, semanticclass, semanticscheme, info.getInferred(), info.getTrust(),
262
//                        info.getInferenceprovenance(), info.getProvenanceaction().getClassid()));
263
            }
264
        }
265
        return relations;
266
    }
267

    
268

    
269
    private void buildContexts(final TypeProtos.Type type, final JsonObject metadata) {
270

    
271
        if ((contextMapper != null) && !contextMapper.isEmpty() && type.equals(TypeProtos.Type.publication)) {
272
            JsonObject contextRoot = new JsonObject();
273
            JsonArray contexts = new JsonArray();
274
            contextRoot.add("contextRoot", contexts);
275

    
276
            for (final String context : contextes) {
277

    
278
                String id = "";
279
                for (final String token : Splitter.on("::").split(context)) {
280
                    id += token;
281

    
282
                    final ContextDef def = contextMapper.get(id);
283

    
284
                    if (def == null)
285
                        throw new IllegalStateException(String.format("cannot find context for id '%s'", id));
286

    
287
                    if (def.getName().equals("context")) {
288
                        if (!findExistingContext(contexts,def.getId()))
289
                            addContextDef(contexts,def);
290
                    }
291

    
292
                    if (def.getName().equals("category")) {
293
                        final String rootId = StringUtils.substringBefore(def.getId(), "::");
294
                        addContextDef(contexts, def);
295
                    }
296

    
297
                    if (def.getName().equals("concept")) {
298
                        addContextDef(contexts, def);
299
                    }
300
                    id += "::";
301
                }
302
            }
303

    
304
        }
305
    }
306

    
307
    private boolean findExistingContext(JsonArray contexes, String id) {
308
        for(JsonElement item:contexes) {
309
            if(item.getAsJsonObject().has("context")) {
310
                if (item.getAsJsonObject().get("context").getAsJsonObject().get("id").getAsString().equals(id)) {
311
                    return true;
312
                }
313
            }
314
        }
315
        return false;
316

    
317
    }
318

    
319

    
320
    private void addContextDef(final JsonArray tags, final ContextDef def) {
321

    
322
        final JsonObject defContext = new JsonObject();
323
        defContext.addProperty("id",def.getId());
324
        defContext.addProperty("label", def.getLabel());
325
        if ((def.getType() != null) && !def.getType().isEmpty()) {
326
            defContext.addProperty("type", def.getType());
327
        }
328
        final JsonObject result = new JsonObject();
329
        result.add(def.getName(),defContext);
330
        tags.add(result);
331
    }
332

    
333

    
334
    private void asJsonElement(final String name, final String value, final FieldTypeProtos.Qualifier q, final FieldTypeProtos.DataInfo dataInfo, final JsonObject metadata) {
335

    
336
        JsonObject root = new JsonObject();
337
        getAttributes(q, root);
338
        if (dataInfo != null) {
339
            dataInfoAsAttributes(root, dataInfo);
340
        }
341
        if (value != null && !StringUtils.isEmpty(value))
342
            root.addProperty("value", value);
343

    
344
        if (root.entrySet().size() > 0)
345
            metadata.add(name, root);
346

    
347
    }
348

    
349
    private void getAttributes(final FieldTypeProtos.Qualifier q, JsonObject root) {
350
        if (q == null) return;
351
        q.getAllFields().entrySet().forEach(e -> root.addProperty(e.getKey().getName(), e.getValue().toString()));
352

    
353

    
354
    }
355

    
356
    private void dataInfoAsAttributes(final JsonObject root, final FieldTypeProtos.DataInfo dataInfo) {
357
        root.addProperty("inferred", dataInfo.getInferred());
358
        root.addProperty("inferenceprovenance", dataInfo.getInferenceprovenance());
359
        root.addProperty("provenanceaction", dataInfo.getProvenanceaction().getClassid());
360
        root.addProperty("trust", dataInfo.getTrust());
361

    
362
    }
363

    
364

    
365
}
(2-2/7)