Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.dataimport;
2

    
3
import java.util.ArrayList;
4
import java.util.List;
5
import java.util.Map;
6
import java.util.Objects;
7

    
8
import com.google.gson.JsonArray;
9
import com.google.gson.JsonElement;
10
import com.google.gson.JsonObject;
11
import com.googlecode.protobuf.format.JsonFormat;
12
import eu.dnetlib.actionmanager.actions.ActionFactory;
13
import eu.dnetlib.actionmanager.actions.AtomicAction;
14
import eu.dnetlib.actionmanager.common.Agent;
15
import eu.dnetlib.data.mapreduce.util.StreamUtils;
16
import eu.dnetlib.data.proto.RelMetadataProtos;
17
import eu.dnetlib.data.proto.RelTypeProtos;
18
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
19
import org.apache.commons.lang3.StringUtils;
20

    
21
import static eu.dnetlib.data.proto.FieldTypeProtos.*;
22
import static eu.dnetlib.data.proto.KindProtos.Kind;
23
import static eu.dnetlib.data.proto.OafProtos.*;
24
import static eu.dnetlib.data.proto.ResultProtos.Result;
25
import static eu.dnetlib.data.proto.ResultProtos.Result.*;
26
import static eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
27
import static eu.dnetlib.data.proto.TypeProtos.Type;
28

    
29
public class ScholixToActions {
30

    
31
    public static List<AtomicAction> generateActionsFromScholix(final JsonObject rootElement, final Map<String, ScholExplorerConfiguration> conf,
32
                                                                final String setName, final Agent agent, ActionFactory factory, String nsPrefix, final String dsName,
33
                                                                final String dsId, String dateOfCollection) {
34

    
35
        final List<AtomicAction> actions = new ArrayList<>();
36

    
37
        final String typology = getStringValue(rootElement, "typology");
38
        final List<String> publisher = getArrayValues(rootElement, "publisher");
39
        final String abstractValue = getStringValue(rootElement, "abstract");
40
        final List<String> authors = getArrayValues(rootElement, "author");
41
        final List<String> dates = getArrayValues(rootElement, "date");
42

    
43
        final JsonArray localIdentifiers = rootElement.getAsJsonArray("localIdentifier");
44
        final String dnetId = getStringValue(rootElement, "id").substring(17);
45

    
46
        String title = "";
47
        if (rootElement.has("title") && rootElement.get("title").isJsonArray()) {
48
            StringBuilder ttl = new StringBuilder();
49
            getArrayValues(rootElement, "title").forEach(ttl::append);
50
            title = ttl.toString();
51
        } else {
52
            title = getStringValue(rootElement, "title");
53
        }
54

    
55
        if (title != null && title.charAt(0) == '"' && title.charAt(title.length() - 1) == '"') {
56
            title = title.substring(1, title.length() - 1);
57
        }
58

    
59
        final Oaf.Builder oafBuilder = Oaf.newBuilder();
60

    
61
        final boolean isVisible = StringUtils.isNotBlank(title) && StreamUtils.toStream(localIdentifiers.iterator())
62
                .map(JsonElement::getAsJsonObject)
63
                .anyMatch(o -> {
64
                    final String type = getStringValue(o, "type");
65
                    return StringUtils.isNotBlank(type) && conf.containsKey(type) && conf.get(type).isVisible();
66
                });
67
        oafBuilder.setDataInfo(
68
                DataInfo.newBuilder()
69
                        .setInvisible(!isVisible)
70
                        .setDeletedbyinference(false)
71
                        .setInferred(false)
72
                        .setTrust("0.9")
73
                        .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
74
                        .build());
75
        oafBuilder.setKind(Kind.entity);
76
        final String sourceId = String.format("50|%s::%s", nsPrefix, dnetId);
77
        final KeyValue collectedFrom = KeyValue.newBuilder()
78
                .setValue(dsName)
79
                .setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(dsId))
80
                .build();
81
        final OafEntity.Builder oafEntityBuilder = OafEntity.newBuilder()
82
                .setType(Type.result)
83
                .setDateofcollection(dateOfCollection)
84
                .addCollectedfrom(collectedFrom)
85
                .setId(sourceId);
86

    
87
        StreamUtils.toStream(localIdentifiers.iterator())
88
                .map(JsonElement::getAsJsonObject)
89
                .map(localIdentifier -> getPid(localIdentifier, conf))
90
                .filter(Objects::nonNull)
91
                .forEach(oafEntityBuilder::addPid);
92

    
93
        final Result.Builder result = Result.newBuilder();
94

    
95
        final Metadata.Builder metadata = Metadata.newBuilder()
96
            .setResulttype(getQualifier(typology, "dnet:result_typologies"))
97
            .setLanguage(Qualifier.newBuilder()
98
                .setClassid("und")
99
                .setClassname("Undetermined")
100
                .setSchemeid("dent:languages")
101
                .setSchemename("dent:languages")
102
                .build());
103
        if (StringUtils.isNotBlank(title)) {
104
            metadata.addTitle(StructuredProperty.newBuilder()
105
                    .setValue(title)
106
                    .setQualifier(getQualifier("main title", "dnet:dataCite_title"))
107
                    .build());
108
        }
109
        if (publisher.size() > 0)
110
            metadata.setPublisher(StringField.newBuilder().setValue(publisher.get(0)).build());
111
        if (StringUtils.isNotEmpty(abstractValue)) {
112
            metadata.addDescription(StringField.newBuilder().setValue(abstractValue).build());
113
        }
114
        dates.stream().map(it -> {
115
           if (it.length() == 4) {
116
               return it+"-01-01";
117
           }
118
           else if (it.length() > 10) {
119
               return it.substring(0,10);
120
           }
121
           else
122
               return it;
123
        }).forEach(it -> metadata.addRelevantdate(StructuredProperty.newBuilder()
124
                .setValue(it)
125
                .setQualifier(getQualifier("dnet:date", "dnet:date"))
126
                .build()));
127

    
128
        if (rootElement.has("subject")) {
129
            JsonArray subject = rootElement.getAsJsonArray("subject");
130
            subject.forEach(it -> {
131
                    final JsonObject item = it.getAsJsonObject();
132
                    final String scheme = getStringValue(item, "scheme");
133
                    metadata.addSubject(StructuredProperty.newBuilder()
134
                            .setValue(Objects.requireNonNull(getStringValue(item, "value")))
135
                            .setQualifier(getQualifier(scheme, "dnet:subject"))
136
                            .build());
137
                }
138
            );
139
        }
140
        int i = 1;
141
        for (String it : authors) {
142
            metadata.addAuthor(Author.newBuilder()
143
                    .setFullname(it)
144
                    .setRank(i++)
145
                    .build());
146
        }
147
        result.setMetadata(metadata.build());
148

    
149
        localIdentifiers.forEach(it -> {
150

    
151
            final JsonObject localIdentifier = it.getAsJsonObject();
152
            final String pidType = getStringValue(localIdentifier, "type");
153
            final ScholExplorerConfiguration currentConf = conf.get(pidType);
154
            if (currentConf.getGeneratedUrl() != null) {
155
                final Instance.Builder instance = Instance.newBuilder();
156
                final String pidValue = getStringValue(localIdentifier, "id");
157
                instance.addUrl(String.format(currentConf.getGeneratedUrl(), pidValue));
158
                instance.setAccessright(Qualifier.newBuilder()
159
                        .setClassid("UNKNOWN")
160
                        .setClassname("not available")
161
                        .setSchemeid("dnet:access_modes")
162
                        .setSchemename("dnet:access_modes")
163
                        .build());
164

    
165
                instance.setInstancetype(Qualifier.newBuilder()
166
                        .setClassid("0000")
167
                        .setClassname("Unknown")
168
                        .setSchemeid("dnet:publication_resource")
169
                        .setSchemename("dnet:publication_resource")
170
                        .build());
171
                instance.setHostedby(KeyValue.newBuilder()
172
                        .setKey("10|openaire____::55045bd2a65019fd8e6741a755395c8c")
173
                        .setValue("Unknown Repository")
174
                        .build());
175

    
176
                instance.setCollectedfrom(collectedFrom);
177
                result.addInstance(instance);
178
            }
179
        });
180

    
181
        generateExternalReference(extractRelations(rootElement, "externalRels"))
182
                .forEach(result::addExternalReference);
183
        oafEntityBuilder.setResult(result.build());
184
        oafBuilder.setEntity(oafEntityBuilder.build());
185

    
186
        System.out.println(JsonFormat.printToString(oafBuilder.build()));
187

    
188
        actions.add(factory.createAtomicAction(setName, agent, oafEntityBuilder.getId(), "result", "body", oafBuilder.build().toByteArray()));
189

    
190
        final List<JsonObject> doiRels = extractRelations(rootElement, "doiRels");
191
        doiRels.stream().map(it -> convertDoiRelations(it, factory, sourceId, nsPrefix, collectedFrom, setName, agent)).forEach(actions::addAll);
192
        return actions;
193
    }
194

    
195

    
196
    private static AtomicAction createResultResultRelation(final String source, final String target,
197
                                                           final KeyValue collectedFrom, final ResultResult resultResultRel, final String relClass, final String cfRelation, final ActionFactory factory, final String setName, final Agent agent) {
198
        final Oaf.Builder oaf = Oaf.newBuilder();
199
        oaf.setDataInfo(
200
                DataInfo.newBuilder()
201
                        .setDeletedbyinference(false)
202
                        .setInferred(false)
203
                        .setTrust("0.9")
204
                        .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
205
                        .build());
206
        oaf.setKind(Kind.relation);
207
        final OafRel.Builder relation = OafRel.newBuilder();
208
        relation.setSource(source);
209
        relation.setTarget(target);
210
        relation.setRelType(RelTypeProtos.RelType.resultResult);
211
        relation.setSubRelType(RelTypeProtos.SubRelType.publicationDataset);
212
        relation.setChild(false);
213
        relation.setResultResult(resultResultRel);
214
        relation.setRelClass(relClass);
215
        relation.addCollectedfrom(collectedFrom);
216
        oaf.setRel(relation.build());
217

    
218
        //System.out.println(JsonFormat.printToString(oaf.build()));
219
        return factory.createAtomicAction(setName, agent, source, cfRelation, target, oaf.build().toByteArray());
220
    }
221

    
222

    
223
    private static List<AtomicAction> convertDoiRelations(final JsonObject doiRel, final ActionFactory factory, final String sourceId, final String nsPrefix, final KeyValue collectedFrom, final String setName, final Agent agent) {
224
        final String target = Objects.requireNonNull(getStringValue(doiRel, "dnetId")).substring(17);
225
        final String targetId = String.format("50|%s::%s", nsPrefix, target);
226
        final String relationSemantic = getStringValue(doiRel, "relationSemantic");
227
        String cfRelation;
228
        String cfInverseRelation;
229
        ResultResult.Builder resultRel = ResultResult.newBuilder();
230
        ResultResult.Builder resultInverseRel = ResultResult.newBuilder();
231
        String relClass;
232
        String inverseRelClass;
233

    
234
        switch (relationSemantic) {
235
            case "isSupplementedBy": {
236
                cfRelation = "resultResult_supplement_isSupplementedBy";
237
                cfInverseRelation = "resultResult_supplement_isSupplementTo";
238

    
239
                relClass = ResultResult.Supplement.RelName.isSupplementedBy.toString();
240
                inverseRelClass = ResultResult.Supplement.RelName.isSupplementTo.toString();
241
                resultRel.setSupplement(ResultResult.Supplement.newBuilder()
242
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
243
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
244
                                .build())
245
                        .build());
246
                resultInverseRel.setSupplement(ResultResult.Supplement.newBuilder()
247
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
248
                                .setSemantics(getQualifier(inverseRelClass, "dnet:result_result_relations"))
249
                                .build())
250
                        .build());
251
                break;
252
            }
253
            case "isSupplementTo": {
254
                cfRelation = "resultResult_supplement_isSupplementTo";
255
                cfInverseRelation = "resultResult_supplement_isSupplementedBy";
256
                inverseRelClass = ResultResult.Supplement.RelName.isSupplementedBy.toString();
257
                relClass = ResultResult.Supplement.RelName.isSupplementTo.toString();
258
                resultInverseRel.setSupplement(ResultResult.Supplement.newBuilder()
259
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
260
                                .setSemantics(getQualifier(inverseRelClass, "dnet:result_result_relations"))
261
                                .build())
262
                        .build());
263
                resultRel.setSupplement(ResultResult.Supplement.newBuilder()
264
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
265
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
266
                                .build())
267
                        .build());
268
                break;
269
            }
270
            default: {
271
                cfRelation = "resultResult_publicationDataset_isRelatedTo";
272
                cfInverseRelation = "resultResult_publicationDataset_isRelatedTo";
273
                relClass = ResultResult.PublicationDataset.RelName.isRelatedTo.toString();
274
                inverseRelClass = relClass;
275
                resultInverseRel.setPublicationDataset(ResultResult.PublicationDataset.newBuilder()
276
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
277
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
278
                                .build())
279
                        .build());
280
                resultRel = resultInverseRel;
281
            }
282
        }
283

    
284
        final List<AtomicAction> actions = new ArrayList<>();
285
        actions.add(createResultResultRelation(sourceId, targetId, collectedFrom, resultRel.build(), relClass, cfRelation, factory, setName, agent));
286
        actions.add(createResultResultRelation(targetId, sourceId, collectedFrom, resultInverseRel.build(), inverseRelClass, cfInverseRelation, factory, setName, agent));
287

    
288
        return actions;
289
    }
290

    
291
    private static List<ExternalReference> generateExternalReference(final List<JsonObject> jsonRels) {
292
        final List<ExternalReference> result = new ArrayList<>();
293

    
294
        jsonRels.forEach(it -> {
295
            ExternalReference.Builder builder = ExternalReference.newBuilder();
296
            if("url".equals(getStringValue(it.getAsJsonObject("id"), "schema"))) {
297
                builder.setUrl(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "identifier")));
298
            }
299
            result.add(builder
300
                    .setRefidentifier(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "identifier")))
301
                    .setSitename(Objects.requireNonNull(getStringValue(it, "collectedFrom")))
302
                    .setQualifier(Qualifier.newBuilder()
303
                            .setClassid(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "schema")))
304
                            .setClassname(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "schema")))
305
                            .setSchemename("dnet:externalReference_typologies")
306
                            .setSchemeid("dnet:externalReference_typologies")
307
                            .build())
308
                    .build());
309
        });
310
        return result;
311
    }
312

    
313
    private static List<JsonObject> extractRelations(final JsonObject rootElement, final String fieldType) {
314
        final List<JsonObject> result = new ArrayList<>();
315
        if (rootElement.has(fieldType) && rootElement.get(fieldType).isJsonArray()) {
316
            final JsonArray asJsonArray = rootElement.getAsJsonArray(fieldType);
317
            asJsonArray.forEach(it -> result.add(it.getAsJsonObject()));
318
        }
319
        return result;
320
    }
321

    
322
    public static Qualifier getQualifier(final String classValue, final String schemeValue) {
323

    
324
        return Qualifier.newBuilder()
325
                .setSchemeid(schemeValue)
326
                .setSchemename(schemeValue)
327
                .setClassname(classValue)
328
                .setClassid(classValue)
329
                .build();
330
    }
331

    
332
    private static StructuredProperty getPid(final JsonObject localIdentifier, final Map<String, ScholExplorerConfiguration> conf) {
333
        final String pidType = getStringValue(localIdentifier, "type");
334
        final ScholExplorerConfiguration configuration = conf.get(pidType);
335
        if (configuration.getCleandPidType() == null) {
336
            return null;
337
        }
338
        final String pid = getStringValue(localIdentifier, "id");
339
        return StructuredProperty.newBuilder()
340
                .setValue(pid)
341
                .setQualifier(getQualifier(configuration.getCleandPidType(), "dnet:pid_types"))
342
                .build();
343
    }
344

    
345
    public static String getStringValue(final JsonObject root, final String key) {
346
        if (root.has(key) && !root.get(key).isJsonNull())
347
            return root.get(key).getAsString();
348
        return null;
349
    }
350

    
351
    public static List<String> getArrayValues(final JsonObject root, final String key) {
352
        if (root.has(key) && root.get(key).isJsonArray()) {
353
            final JsonArray asJsonArray = root.get(key).getAsJsonArray();
354
            final List<String> result = new ArrayList<>();
355

    
356

    
357
            asJsonArray.forEach(it -> {
358
                if (StringUtils.isNotBlank(it.getAsString())) {
359
                    result.add(it.getAsString());
360
                }
361
            });
362
            return result;
363
        }
364
        return new ArrayList<>();
365
    }
366

    
367
}
(9-9/10)